diff --git a/frontend/mainwindow.h b/frontend/mainwindow.h index 1a67aeb..9b33c5c 100644 --- a/frontend/mainwindow.h +++ b/frontend/mainwindow.h @@ -47,7 +47,6 @@ #include #include "search-app-widget-plugin/search.h" -#include "index-generator.h" #include "libsearch.h" #include "create-index-ask-dialog.h" #include "search-line-edit.h" diff --git a/libsearch/common.h b/libsearch/common.h index b7ccb42..1b0c9e7 100644 --- a/libsearch/common.h +++ b/libsearch/common.h @@ -1,8 +1,9 @@ -#pragma once +#pragma once #include #include #include +namespace UkuiSearch { #define CONTENT_DATABASE_PATH_SLOT 1 #define CONTENT_DATABASE_SUFFIX_SLOT 2 @@ -12,7 +13,6 @@ static const int LABEL_MAX_WIDTH = 300; static const QString HOME_PATH = QDir::homePath(); static const QString INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/index_data"); static const QString CONTENT_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/content_index_data"); -static const QString OCR_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/ocr_index_data"); static const QString FILE_SEARCH_VALUE = QStringLiteral("0"); static const QString DIR_SEARCH_VALUE = QStringLiteral("1"); static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem"); @@ -67,4 +67,23 @@ static const QMap targetPhotographTypeMap = { {"jpeg", true} // TODO 待完善,后续改为配置文件 }; //TODO Put things that needed to be put here here. +/** + * @brief The DataBaseType enum + * Basic 基础数据库 + * Content 文本内容数据库 + */ +enum class DataBaseType { + Basic = 0, + Content = 1 +}; +/** + * @brief The IndexType enum + * Index type, notice:OCR index is part of Content index. + */ +enum class IndexType { + Basic, + Contents, + OCR +}; +} diff --git a/libsearch/file-utils.cpp b/libsearch/file-utils.cpp index c7d09e3..331a47c 100644 --- a/libsearch/file-utils.cpp +++ b/libsearch/file-utils.cpp @@ -218,7 +218,7 @@ void FileUtils::loadHanziTable(const QString &fileName) { return; } -QMimeType FileUtils::getMimetype(QString &path) { +QMimeType FileUtils::getMimetype(const QString &path) { QMimeDatabase mdb; QMimeType type = mdb.mimeTypeForFile(path, QMimeDatabase::MatchContent); @@ -488,7 +488,7 @@ QStringList FileUtils::findMultiToneWords(const QString &hanzi) { * @param path: abs path * @return docx to QString */ -void FileUtils::getDocxTextContent(QString &path, QString &textcontent) { +void FileUtils::getDocxTextContent(const QString &path, QString &textcontent) { //fix me :optimized by xpath?? QFileInfo info = QFileInfo(path); if(!info.exists() || info.isDir()) @@ -548,7 +548,7 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) { */ } -void FileUtils::getPptxTextContent(QString &path, QString &textcontent) { +void FileUtils::getPptxTextContent(const QString &path, QString &textcontent) { QFileInfo info = QFileInfo(path); if(!info.exists() || info.isDir()) return; @@ -660,7 +660,7 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) { */ } -void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) { +void FileUtils::getXlsxTextContent(const QString &path, QString &textcontent) { QFileInfo info = QFileInfo(path); if(!info.exists() || info.isDir()) return; @@ -724,7 +724,7 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) { */ } -void FileUtils::getPdfTextContent(QString &path, QString &textcontent) { +void FileUtils::getPdfTextContent(const QString &path, QString &textcontent) { Poppler::Document *doc = Poppler::Document::load(path); if(doc->isLocked()) { delete doc; @@ -733,15 +733,19 @@ void FileUtils::getPdfTextContent(QString &path, QString &textcontent) { const QRectF qf; int pageNum = doc->numPages(); for(int i = 0; i < pageNum; ++i) { - textcontent.append(doc->page(i)->text(qf).replace("\n", "").replace("\r", " ")); - if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) - break; + Poppler::Page *page = doc->page(i); + if(page) { + textcontent.append(page->text(qf).replace("\n", "").replace("\r", " ")); + delete page; + if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) + break; + } } delete doc; return; } -void FileUtils::getTxtContent(QString &path, QString &textcontent) { +void FileUtils::getTxtContent(const QString &path, QString &textcontent) { QFile file(path); if(!file.open(QIODevice::ReadOnly | QIODevice::Text)) return; @@ -935,7 +939,7 @@ QIcon FileUtils::iconFromTheme(const QString &name, const QIcon &iconDefault) return QIcon::fromTheme(name, iconDefault); } -bool FileUtils::isOpenXMLFileEncrypted(QString &path) +bool FileUtils::isOpenXMLFileEncrypted(const QString &path) { QFile file(path); file.open(QIODevice::ReadOnly|QIODevice::Text); @@ -954,52 +958,53 @@ bool FileUtils::isOpenXMLFileEncrypted(QString &path) } } //todo: only support docx, pptx, xlsx -bool FileUtils::isEncrypedOrUnreadable(QString path) +bool FileUtils::isEncrypedOrUnsupport(const QString& path, const QString& suffix) { QMimeType type = FileUtils::getMimetype(path); QString name = type.name(); - QFileInfo file(path); - QString strsfx = file.suffix(); + if(name == "application/zip") { - if (strsfx == "docx" || strsfx == "pptx" || strsfx == "xlsx") { + if (suffix == "docx" || suffix == "pptx" || suffix == "xlsx") { return FileUtils::isOpenXMLFileEncrypted(path); - } else if (strsfx == "uot" || strsfx == "uos" || strsfx == "uop") { + } else if (suffix == "uot" || suffix == "uos" || suffix == "uop") { return false; - } else if (strsfx == "ofd") { + } else if (suffix == "ofd") { return false; } else { return true; } } else if(name == "text/plain") { - if(strsfx.endsWith("txt")) + if(suffix.endsWith("txt")) return false; return true; } else if(name == "text/html") { - if(strsfx.endsWith("html")) + if(suffix.endsWith("html")) return false; return true; } else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") { - if(strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" || - strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls" || strsfx == "uof") { + if(suffix == "doc" || suffix == "dot" || suffix == "wps" || suffix == "ppt" || + suffix == "pps" || suffix == "dps" || suffix == "et" || suffix == "xls" || suffix == "uof") { return false; } return true; } else if(name == "application/pdf") { - if(strsfx == "pdf") + if(suffix == "pdf") return false; return true; } else if(name == "application/xml" || name == "application/uof") { - if(strsfx == "uof") { + if(suffix == "uof") { return false; } return true; + } else if (true == targetPhotographTypeMap[suffix]) { + return !isOcrSupportSize(path); } else { - qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]"; +// qInfo() << "Unsupport format:[" << path << "][" << type.name() << "]"; return true; } } @@ -1020,7 +1025,7 @@ bool FileUtils::isOcrSupportSize(QString path) */ QImage file(path); if (file.height() < OCR_MIN_SIZE or file.width() < OCR_MIN_SIZE) {//限制图片像素尺寸 - qDebug() << "file:" << path << "is not right size."; +// qDebug() << "file:" << path << "is not right size."; return false; } else return true; @@ -1099,7 +1104,7 @@ QString FileUtils::wrapData(QLabel *p_label, const QString &text) * * 目标:文本串 */ -void FileUtils::getUOFTextContent(QString &path, QString &textContent) +void FileUtils::getUOFTextContent(const QString &path, QString &textContent) { QFileInfo info(path); if (!info.exists() || info.isDir()) { @@ -1278,7 +1283,7 @@ inline void FileUtils::findNodeAttr(const QDomElement &elem, QQueue &na * @param path * @param textContent */ -void FileUtils::getUOF2TextContent(QString &path, QString &textContent) +void FileUtils::getUOF2TextContent(const QString &path, QString &textContent) { QFileInfo info = QFileInfo(path); if (!info.exists() || info.isDir()) @@ -1312,7 +1317,7 @@ void FileUtils::getUOF2TextContent(QString &path, QString &textContent) file.close(); } -void FileUtils::getUOF2PPTContent(QString &path, QString &textContent) +void FileUtils::getUOF2PPTContent(const QString &path, QString &textContent) { QFileInfo info = QFileInfo(path); if (!info.exists() || info.isDir()) @@ -1409,7 +1414,7 @@ inline bool FileUtils::loadZipFileToDoc(QuaZip &zipFile, QDomDocument &doc, cons * @param path * @param textContent */ -void FileUtils::getOFDTextContent(QString &path, QString &textContent) +void FileUtils::getOFDTextContent(const QString &path, QString &textContent) { QFileInfo info = QFileInfo(path); if (!info.exists() || info.isDir()) diff --git a/libsearch/file-utils.h b/libsearch/file-utils.h index d141f50..a24b1f2 100644 --- a/libsearch/file-utils.h +++ b/libsearch/file-utils.h @@ -85,24 +85,30 @@ public: static void loadHanziTable(const QString &fileName); //parse text,docx..... - static QMimeType getMimetype(QString &path); - static void getDocxTextContent(QString &path, QString &textcontent); - static void getPptxTextContent(QString &path, QString &textcontent); - static void getXlsxTextContent(QString &path, QString &textcontent); - static void getPdfTextContent(QString &path, QString &textcontent); - static void getTxtContent(QString &path, QString &textcontent); - static void getUOFTextContent(QString &path, QString &textContent); - static void getUOF2TextContent(QString &path, QString &textContent); - static void getUOF2PPTContent(QString &path, QString &textContent); - static void getOFDTextContent(QString &path, QString &textContent); + static QMimeType getMimetype(const QString &path); + static void getDocxTextContent(const QString &path, QString &textcontent); + static void getPptxTextContent(const QString &path, QString &textcontent); + static void getXlsxTextContent(const QString &path, QString &textcontent); + static void getPdfTextContent(const QString &path, QString &textcontent); + static void getTxtContent(const QString &path, QString &textcontent); + static void getUOFTextContent(const QString &path, QString &textContent); + static void getUOF2TextContent(const QString &path, QString &textContent); + static void getUOF2PPTContent(const QString &path, QString &textContent); + static void getOFDTextContent(const QString &path, QString &textContent); static int openFile(QString &path, bool openInDir = false); static bool copyPath(QString &path); static QString escapeHtml(const QString &str); static QString chineseSubString(const std::string &myStr,int start,int length); static QIcon iconFromTheme(const QString &name, const QIcon &iconDefault); - static bool isOpenXMLFileEncrypted(QString &path); - static bool isEncrypedOrUnreadable(QString path); + static bool isOpenXMLFileEncrypted(const QString &path); + /** + * @brief isEncrypedOrUnsupport + * @param path + * @param suffix + * @return true if file(path) is not a support format for content index. + */ + static bool isEncrypedOrUnsupport(const QString &path, const QString &suffix); static bool isOcrSupportSize(QString path); static size_t maxIndexCount; static unsigned short indexStatus; diff --git a/libsearch/filesystemwatcher/file-system-watcher-private.h b/libsearch/filesystemwatcher/file-system-watcher-private.h index 277ed2d..9345270 100644 --- a/libsearch/filesystemwatcher/file-system-watcher-private.h +++ b/libsearch/filesystemwatcher/file-system-watcher-private.h @@ -27,8 +27,7 @@ #include #include #include - -#include "traverse-bfs.h" +#include namespace UkuiSearch { class FileSystemWatcherPrivate @@ -39,10 +38,10 @@ public: ~FileSystemWatcherPrivate(); void addWatch(const QStringList &pathList); + void addWatchWithBlackList(const QStringList &pathList, const QStringList &blackList); QStringList removeWatch(const QString &path); QString removeWatch(int wd); - - + void clearAll(); private: void init(); @@ -56,8 +55,6 @@ private: QSocketNotifier* m_notifier = nullptr; // wd -> url QHash m_watchPathHash; - - QThreadPool *m_pool = nullptr; FileSystemWatcher *q = nullptr; bool m_recursive = true; diff --git a/libsearch/filesystemwatcher/file-system-watcher.cpp b/libsearch/filesystemwatcher/file-system-watcher.cpp index 99b12e9..39bf4f8 100644 --- a/libsearch/filesystemwatcher/file-system-watcher.cpp +++ b/libsearch/filesystemwatcher/file-system-watcher.cpp @@ -39,19 +39,15 @@ FileSystemWatcherPrivate::FileSystemWatcherPrivate(FileSystemWatcher *parent) : qDebug() << "setInotifyMaxUserWatches end"; init(); - m_pool = new QThreadPool; } FileSystemWatcherPrivate::~FileSystemWatcherPrivate() { + close(m_inotifyFd); if(m_notifier) { delete m_notifier; m_notifier = nullptr; } - if(m_pool) { - delete m_pool; - m_pool = nullptr; - } } void FileSystemWatcherPrivate::traverse(QStringList pathList) @@ -66,13 +62,12 @@ void FileSystemWatcherPrivate::traverse(QStringList pathList) } QFileInfoList list; QDir dir; - dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot); - dir.setSorting(QDir::DirsFirst); + dir.setFilter(QDir::Dirs | QDir::NoDotAndDotDot); while(!queue.empty()) { dir.setPath(queue.dequeue()); list = dir.entryInfoList(); for(auto i : list) { - if(i.isDir() && (!(i.isSymLink()))) { + if(!(i.isSymLink())) { queue.enqueue(i.absoluteFilePath()); addWatch(i.absoluteFilePath()); } @@ -95,14 +90,54 @@ void FileSystemWatcherPrivate::addWatch(const QString &path) void FileSystemWatcherPrivate::addWatch(const QStringList &pathList) { - QtConcurrent::run(m_pool, [ = ](){ - traverse(pathList); - }); + traverse(pathList); +} + +void FileSystemWatcherPrivate::addWatchWithBlackList(const QStringList &pathList, const QStringList &blackList) +{ + QQueue bfs; + QStringList tmpPathList = pathList; + for(QString blackPath : blackList) { + for(QString path : pathList) { + if(FileUtils::isOrUnder(path, blackPath)) { + tmpPathList.removeOne(path); + } + } + } + for(QString path : tmpPathList) { + addWatch(path); + bfs.enqueue(path); + } + + QFileInfoList list; + QDir dir; + QStringList tmpList = blackList; + dir.setFilter(QDir::Dirs | QDir::NoDotAndDotDot); + while(!bfs.empty()) { + dir.setPath(bfs.dequeue()); + list = dir.entryInfoList(); + for(auto i : list) { + bool isBlocked = false; + for(QString path : tmpList) { + if(i.absoluteFilePath() == path) { + isBlocked = true; + tmpList.removeOne(path); + break; + } + } + if(isBlocked) + continue; + + if(!(i.isSymLink())) { + addWatch(i.absoluteFilePath()); + bfs.enqueue(i.absoluteFilePath()); + } + } + } } QStringList FileSystemWatcherPrivate::removeWatch(const QString &path) { - m_pool->waitForDone(); QStringList paths; for(int wd : m_watchPathHash.keys()) { QString tmpPath = m_watchPathHash.value(wd); @@ -113,7 +148,6 @@ QStringList FileSystemWatcherPrivate::removeWatch(const QString &path) } } } - return paths; } @@ -123,6 +157,20 @@ QString FileSystemWatcherPrivate::removeWatch(int wd) return m_watchPathHash.take(wd); } +void FileSystemWatcherPrivate::clearAll() +{ + if(m_inotifyFd > 0) { + close(m_inotifyFd); + m_inotifyFd = -1; + } + if(m_notifier) { + delete m_notifier; + m_notifier = nullptr; + } + m_watchPathHash.clear(); + init(); +} + void FileSystemWatcherPrivate::init() { if(m_inotifyFd < 0) { @@ -177,14 +225,24 @@ void FileSystemWatcher::addWatch(const QString &path) d->addWatch(QStringList(path)); } +void FileSystemWatcher::addWatchWithBlackList(const QStringList &pathList, const QStringList &blackList) +{ + d->addWatchWithBlackList(pathList, blackList); +} + QStringList FileSystemWatcher::removeWatch(const QString &path) { return d->removeWatch(path); } +void FileSystemWatcher::clearAll() +{ + d->clearAll(); +} + void FileSystemWatcher::eventProcess(int socket) { - qDebug() << "-----begin event process-----"; +// qDebug() << "-----begin event process-----"; int avail; if (ioctl(socket, FIONREAD, &avail) == EINVAL) { qWarning() << "Did not receive an entire inotify event."; @@ -214,7 +272,7 @@ void FileSystemWatcher::eventProcess(int socket) return; } - qDebug() << "event mask:" << event->mask; +// qDebug() << "event mask:" << event->mask; QString path; if (event->mask & (EventDeleteSelf | EventMoveSelf)) { @@ -224,7 +282,7 @@ void FileSystemWatcher::eventProcess(int socket) } if(event->mask & EventCreate) { - qDebug() << path << "--EventCreate"; +// qDebug() << path << "--EventCreate"; Q_EMIT created(path, event->mask & IN_ISDIR); if(event->mask & IN_ISDIR && d->m_recursive) { if(!QFileInfo(path).isSymLink()){ @@ -233,34 +291,34 @@ void FileSystemWatcher::eventProcess(int socket) } } if (event->mask & EventDeleteSelf) { - qDebug() << path << "--EventDeleteSelf"; - d->removeWatch(event->wd); - Q_EMIT deleted(path, true); +// qDebug() << path << "--EventDeleteSelf"; + if(event->mask & IN_ISDIR) { + d->removeWatch(event->wd); + } + Q_EMIT deleted(path, event->mask & IN_ISDIR); } if (event->mask & EventDelete) { - qDebug() << path << "--EventDelete"; +// qDebug() << path << "--EventDelete"; // we watch all folders recursively. Thus, folder removing is reported in DeleteSelf. if (!(event->mask & IN_ISDIR)) { Q_EMIT deleted(path, false); } } if (event->mask & EventModify) { - qDebug() << path << "--EventModify"; +// qDebug() << path << "--EventModify"; Q_EMIT modified(path); } if (event->mask & EventMoveSelf) { - qDebug() << path << "--EventMoveSelf"; - d->removeWatch(event->wd); - Q_EMIT moved(path, true); + //Problematic if the parent is not watched, otherwise + // handled by MoveFrom/MoveTo from the parent +// qDebug() << path << "--EventMoveSelf"; } if (event->mask & EventMoveFrom) { - qDebug() << path << "--EventMoveFrom"; - if (!(event->mask & IN_ISDIR)) { - Q_EMIT moved(path, false); - } +// qDebug() << path << "--EventMoveFrom"; + Q_EMIT moved(path, event->mask & IN_ISDIR); } if (event->mask & EventMoveTo) { - qDebug() << path << "--EventMoveTo"; +// qDebug() << path << "--EventMoveTo"; Q_EMIT created(path, event->mask & IN_ISDIR); if (event->mask & IN_ISDIR && d->m_recursive) { if(!QFileInfo(path).isSymLink()){ @@ -269,11 +327,11 @@ void FileSystemWatcher::eventProcess(int socket) } } if (event->mask & EventOpen) { - qDebug() << path << "--EventOpen"; +// qDebug() << path << "--EventOpen"; Q_EMIT opened(path); } if (event->mask & EventUnmount) { - qDebug() << path << "--EventUnmount"; +// qDebug() << path << "--EventUnmount"; if (event->mask & IN_ISDIR) { d->removeWatch(event->wd); } @@ -284,23 +342,23 @@ void FileSystemWatcher::eventProcess(int socket) } } if (event->mask & EventAttributeChange) { - qDebug() << path << "--EventAttributeChange"; +// qDebug() << path << "--EventAttributeChange"; Q_EMIT attributeChanged(path); } if (event->mask & EventAccess) { - qDebug() << path << "--EventAccess"; +// qDebug() << path << "--EventAccess"; Q_EMIT accessed(path); } if (event->mask & EventCloseWrite) { - qDebug() << path << "--EventCloseWrite"; +// qDebug() << path << "--EventCloseWrite"; Q_EMIT closedWrite(path); } if (event->mask & EventCloseRead) { - qDebug() << path << "--EventCloseRead"; +// qDebug() << path << "--EventCloseRead"; Q_EMIT closedRead(path); } if (event->mask & EventIgnored) { - qDebug() << path << "--EventIgnored"; +// qDebug() << path << "--EventIgnored"; } i += sizeof(struct inotify_event) + event->len; } diff --git a/libsearch/filesystemwatcher/file-system-watcher.h b/libsearch/filesystemwatcher/file-system-watcher.h index 647d17e..535be68 100644 --- a/libsearch/filesystemwatcher/file-system-watcher.h +++ b/libsearch/filesystemwatcher/file-system-watcher.h @@ -101,7 +101,9 @@ public: public Q_SLOTS: void addWatch(const QStringList &pathList); void addWatch(const QString &path); + void addWatchWithBlackList(const QStringList &pathList, const QStringList &blackList); QStringList removeWatch(const QString &path); + void clearAll(); Q_SIGNALS: diff --git a/libsearch/index/basic-indexer.cpp b/libsearch/index/basic-indexer.cpp new file mode 100644 index 0000000..17631bd --- /dev/null +++ b/libsearch/index/basic-indexer.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#include "basic-indexer.h" +#include "file-utils.h" +#include +#include +using namespace UkuiSearch; +BasicIndexer::BasicIndexer(const QString& filePath): m_filePath(filePath) +{ +} + +bool BasicIndexer::index() +{ + QFileInfo info = QFileInfo(m_filePath); + + //添加数据 + m_document.setData(m_filePath); + //唯一term + m_document.setUniqueTerm(FileUtils::makeDocUterm(m_filePath)); + //上层文件夹term,用于在上层文件夹删除时删除自己 + m_document.addTerm(FileUtils::makeDocUterm(m_filePath.section("/", 0, -2, QString::SectionIncludeLeadingSep))); + //1-目录, 0-文件 + m_document.addValue(1, QString((info.isDir() && (!info.isSymLink())) ? "1" : "0")); + //修改时间 + QString time = info.lastModified().toString("yyyyMMddHHmmsszzz"); + m_document.setIndexTime(time); + m_document.addSortableSerialiseValue(2, time); + + QString indexName = info.fileName().toLower(); + QStringList pinyinTextList = FileUtils::findMultiToneWords(info.fileName()); + + int i = 0; + int postingCount = 1; //terms post of Xapian document is started from 1! + while(postingCount <= indexName.size()) { + m_document.addPosting(QUrl::toPercentEncoding(indexName.at(i)).toStdString(), postingCount); + ++postingCount; + ++i; + } + for(QString& s : pinyinTextList) { + i = 0; + while(i < s.size()) { + m_document.addPosting(QUrl::toPercentEncoding(s.at(i)).toStdString(), postingCount); + ++postingCount; + ++i; + } + } + return true; +} diff --git a/libsearch/index/basic-indexer.h b/libsearch/index/basic-indexer.h new file mode 100644 index 0000000..820b608 --- /dev/null +++ b/libsearch/index/basic-indexer.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#ifndef BASICINDEXER_H +#define BASICINDEXER_H + +#include "document.h" +namespace UkuiSearch { +class BasicIndexer +{ +public: + BasicIndexer(const QString& filePath); + bool index(); + Document document() { return m_document; } +private: + QString m_filePath; + Document m_document; +}; +} +#endif // BASICINDEXER_H diff --git a/libsearch/index/construct-document.cpp b/libsearch/index/construct-document.cpp deleted file mode 100644 index f67abe5..0000000 --- a/libsearch/index/construct-document.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Authors: zhangpengfei - * - */ -#include "construct-document.h" -#include "file-utils.h" -#include "chinese-segmentation.h" -#include -#include -#include - -//extern QList *g_docListForPath; -//extern QMutex g_mutexDocListForPath; -using namespace UkuiSearch; -ConstructDocumentForPath::ConstructDocumentForPath(QVector list) { - this->setAutoDelete(true); - m_list = std::move(list); -} - -void ConstructDocumentForPath::run() { -// qDebug()<<"ConstructDocumentForPath"; -// if(!UkuiSearch::g_docListForPath) -// UkuiSearch::g_docListForPath = new QVector; -// qDebug()<size(); - QString index_text = m_list.at(0).toLower(); - QString sourcePath = m_list.at(1); - Document doc; - - //多音字版 - //现加入首字母 - QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".", "")); -// if(!pinyin_text_list.isEmpty()) -// { -// for (QString& i : pinyin_text_list){ -// i.replace("", " "); -// i = i.simplified(); -// } -// doc.setIndexText(pinyin_text_list); -// } - - QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath)); - QString upTerm = QString::fromStdString("ZEEKERUPTERM" + FileUtils::makeDocUterm(sourcePath.section("/", 0, -2, QString::SectionIncludeLeadingSep))); -// qDebug()<<"sourcePath"<setAutoDelete(true); - m_path = std::move(path); -} - -void ConstructDocumentForContent::run() { -// qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId(); - //构造文本索引的document - QString content; - QString suffix; - FileReader::getTextContent(m_path, content, suffix); - - Document doc; - doc.setUniqueTerm(FileUtils::makeDocUterm(m_path)); - doc.addTerm("ZEEKERUPTERM" + FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); - doc.addValue(1, m_path); - doc.addValue(2, suffix); - - if(content.isEmpty()) { - doc.reuireDeleted(); - } else { - doc.setData(content); - //'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info. - content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " "); - std::vector term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString()); - for(size_t i = 0; i < term.size(); ++i) { - doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast(term.at(i).weight)); - } - term.clear(); - term.shrink_to_fit(); - } - IndexGenerator::g_mutexDocListForContent.lock(); - IndexGenerator::g_docListForContent.append(doc); - IndexGenerator::g_mutexDocListForContent.unlock(); - content.clear(); - content.squeeze(); - - return; -} - -ConstructDocumentForOcr::ConstructDocumentForOcr(QString path) -{ - this->setAutoDelete(true); - m_path = std::move(path); -} - -void ConstructDocumentForOcr::run() -{ - QString content; - QString suffix; - FileReader::getTextContent(m_path, content, suffix); - - Document doc; - doc.setUniqueTerm(FileUtils::makeDocUterm(m_path)); - doc.addTerm("ZEEKERUPTERM" + FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); - doc.addValue(1, m_path); - doc.addValue(2, suffix); - - if(content.isEmpty()) { - doc.reuireDeleted(); - } else { - doc.setData(content); - //'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info. - content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " "); - std::vector term = ChineseSegmentation::getInstance()->callSegment(content.toStdString()); - for(size_t i = 0; i < term.size(); ++i) { - doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast(term.at(i).weight)); - } - term.clear(); - term.shrink_to_fit(); - } - IndexGenerator::g_mutexDocListForOcr.lock(); - IndexGenerator::g_docListForOcr.append(doc); - IndexGenerator::g_mutexDocListForOcr.unlock(); - content.clear(); - content.squeeze(); -} - diff --git a/libsearch/index/construct-document.h b/libsearch/index/construct-document.h deleted file mode 100644 index dbe0486..0000000 --- a/libsearch/index/construct-document.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Authors: zhangpengfei - * - */ -#ifndef CONSTRUCTDOCUMENT_H -#define CONSTRUCTDOCUMENT_H - -#include -#include -//#include -#include "document.h" -#include "index-generator.h" - -namespace UkuiSearch { - -class IndexGenerator; -class ConstructDocumentForPath : public QRunnable { -public: - explicit ConstructDocumentForPath(QVector list); - ~ConstructDocumentForPath() = default; -protected: - void run(); -private: - QVector m_list; -}; - -class ConstructDocumentForContent : public QRunnable { -public: - explicit ConstructDocumentForContent(QString path); - ~ConstructDocumentForContent() = default; -protected: - void run(); -private: - QString m_path; -}; - -class ConstructDocumentForOcr : public QRunnable { -public: - explicit ConstructDocumentForOcr(QString path); - ~ConstructDocumentForOcr() = default; -protected: - void run(); -private: - QString m_path; -}; -} - -#endif // CONSTRUCTDOCUMENT_H diff --git a/libsearch/index/database.cpp b/libsearch/index/database.cpp new file mode 100644 index 0000000..dbb3b46 --- /dev/null +++ b/libsearch/index/database.cpp @@ -0,0 +1,29 @@ +#include "database.h" + +using namespace UkuiSearch; +Database::Database(const DataBaseType &type) +{ + switch (type) { + case DataBaseType::Basic: + m_database = new Xapian::Database(INDEX_PATH.toStdString()); + break; + case DataBaseType::Content: + m_database = new Xapian::Database(CONTENT_INDEX_PATH.toStdString()); + break; + default: + break; + } +} + +Database::~Database() +{ + if(m_database) { + delete m_database; + m_database = nullptr; + } +} + +uint Database::getIndexDocCount() +{ + return m_database->get_doccount(); +} diff --git a/libsearch/index/database.h b/libsearch/index/database.h new file mode 100644 index 0000000..bd7497b --- /dev/null +++ b/libsearch/index/database.h @@ -0,0 +1,18 @@ +#ifndef DATABASE_H +#define DATABASE_H +#include +#include "common.h" +namespace UkuiSearch { + +class Database +{ +public: + Database(const DataBaseType &type); + ~Database(); + + uint getIndexDocCount(); +private: + Xapian::Database *m_database; +}; +} +#endif // DATABASE_H diff --git a/libsearch/index/document.cpp b/libsearch/index/document.cpp index 98ed782..0fe66d7 100644 --- a/libsearch/index/document.cpp +++ b/libsearch/index/document.cpp @@ -84,43 +84,31 @@ void Document::setUniqueTerm(QString term) { return; m_document.add_term(term.toStdString()); -// m_unique_term = new QString(term); - m_unique_term = std::move(term.toStdString()); + m_uniqueTerm = std::move(term.toStdString()); } void Document::setUniqueTerm(std::string term) { if(term.empty()) return; m_document.add_term(term); - m_unique_term = term; + m_uniqueTerm = std::move(term); } -std::string Document::getUniqueTerm() { -// qDebug()<<"m_unique_term!"<<*m_unique_term; -// qDebug() << QString::fromStdString(m_unique_term.toStdString()); - return m_unique_term;//.toStdString(); +std::string Document::getUniqueTerm() const +{ + return m_uniqueTerm; } -void Document::setIndexText(QStringList indexText) { -// QStringList indexTextList = indexText; -// m_index_text = new QStringList(indexText); - m_index_text = std::move(indexText); -} - -QStringList Document::getIndexText() { - return m_index_text; -} - -Xapian::Document Document::getXapianDocument() { +Xapian::Document Document::getXapianDocument() const{ return m_document; } -void Document::reuireDeleted() +void Document::setIndexTime(const QString &time) { - m_shouldDelete = true; + m_time = time.toStdString(); } -bool Document::isRequiredDeleted() +const std::string Document::indexTime() const { - return m_shouldDelete; + return m_time; } diff --git a/libsearch/index/document.h b/libsearch/index/document.h index 40139d1..9464cbf 100644 --- a/libsearch/index/document.h +++ b/libsearch/index/document.h @@ -31,15 +31,13 @@ public: ~Document() {} Document(const Document& other) { m_document = other.m_document; - m_index_text = other.m_index_text; - m_unique_term = other.m_unique_term; - m_shouldDelete = other.m_shouldDelete; + m_uniqueTerm = other.m_uniqueTerm; + m_time = other.m_time; } void operator=(const Document& other) { m_document = other.m_document; - m_index_text = other.m_index_text; - m_unique_term = other.m_unique_term; - m_shouldDelete = other.m_shouldDelete; + m_uniqueTerm = other.m_uniqueTerm; + m_time = other.m_time; } void setData(QString &data); void addPosting(std::string term, QVector offset, int weight = 1); @@ -51,18 +49,15 @@ public: void addSortableSerialiseValue(unsigned slot, QString value); void setUniqueTerm(QString term); void setUniqueTerm(std::string term); - std::string getUniqueTerm(); - void setIndexText(QStringList indexText); - QStringList getIndexText(); - Xapian::Document getXapianDocument(); - void reuireDeleted(); - bool isRequiredDeleted(); + std::string getUniqueTerm() const; + Xapian::Document getXapianDocument() const; + void setIndexTime(const QString& time); + const std::string indexTime() const; + private: Xapian::Document m_document; - QStringList m_index_text; - //QString m_unique_term; - std::string m_unique_term; - bool m_shouldDelete = false; + std::string m_uniqueTerm; + std::string m_time; }; } diff --git a/libsearch/index/file-content-indexer.cpp b/libsearch/index/file-content-indexer.cpp new file mode 100644 index 0000000..368696e --- /dev/null +++ b/libsearch/index/file-content-indexer.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#include "file-content-indexer.h" +#include "file-reader.h" +#include "file-utils.h" +#include "chinese-segmentation.h" + +using namespace UkuiSearch; +fileContentIndexer::fileContentIndexer(const QString &filePath): m_filePath(filePath) +{ +} + +bool fileContentIndexer::index() +{ + QString content; + QFileInfo info(m_filePath); + QString suffix = info.suffix(); + FileReader::getTextContent(m_filePath, content, suffix); + if(content.isEmpty()) { + return false; + } + + m_document.setData(content); + //'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info. + content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " "); + std::vector term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString()); + content.clear(); + content.squeeze(); + + for(size_t i = 0; i < term.size(); ++i) { + m_document.addPosting(term.at(i).word, term.at(i).offsets, static_cast(term.at(i).weight)); + } + term.clear(); + term.shrink_to_fit(); + + m_document.setUniqueTerm(FileUtils::makeDocUterm(m_filePath)); + m_document.addTerm(FileUtils::makeDocUterm(m_filePath.section("/", 0, -2, QString::SectionIncludeLeadingSep))); + m_document.addValue(1, m_filePath); + m_document.addValue(2, suffix); + m_document.setIndexTime(info.lastModified().toString("yyyyMMddHHmmsszzz")); + + return true; +} diff --git a/libsearch/index/file-content-indexer.h b/libsearch/index/file-content-indexer.h new file mode 100644 index 0000000..8e46f6f --- /dev/null +++ b/libsearch/index/file-content-indexer.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#ifndef FILECONTENTINDEXER_H +#define FILECONTENTINDEXER_H + +#include "document.h" +namespace UkuiSearch { +class fileContentIndexer +{ +public: + fileContentIndexer(const QString& filePath); + bool index(); + Document document() { return m_document; } + +private: + QString m_filePath; + Document m_document; +}; +} +#endif // FILECONTENTINDEXER_H diff --git a/libsearch/index/file-index-manager.cpp b/libsearch/index/file-index-manager.cpp deleted file mode 100644 index dab3f78..0000000 --- a/libsearch/index/file-index-manager.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "file-index-manager.h" -#include "dir-watcher.h" -#include "common.h" -using namespace UkuiSearch; -static FileIndexManager* global_instance = nullptr; -FileIndexManager::FileIndexManager(QObject *parent) : QObject(parent), m_semaphore(INDEX_SEM, 1, QSystemSemaphore::AccessMode::Create) -{ - m_fi = FirstIndex::getInstance(); - m_iw = InotifyWatch::getInstance(); -} - -FileIndexManager *FileIndexManager::getInstance() -{ - if(!global_instance) { - global_instance = new FileIndexManager(); - } - return global_instance; -} - -void FileIndexManager::searchMethod(FileUtils::SearchMethod sm) { - qWarning() << "searchMethod start: " << static_cast(sm); - if(FileUtils::SearchMethod::INDEXSEARCH == sm || FileUtils::SearchMethod::DIRECTSEARCH == sm) { - FileUtils::searchMethod = sm; - } else { - qWarning("enum class error!!!\n"); - } - if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::indexStatus) { - qDebug() << "start first index"; - m_fi->rebuildDatebase(); - qDebug() << "start inotify index"; - if(!this->m_iw->isRunning()) { - this->m_iw->start(); - } - qDebug() << "Search method has been set to INDEXSEARCH"; - } - if(FileUtils::SearchMethod::DIRECTSEARCH == sm) { - m_iw->stopWatch(); - } - qWarning() << "searchMethod end: " << static_cast(FileUtils::searchMethod); -} - -void FileIndexManager::initIndexPathSetFunction() -{ - const QByteArray id(UKUI_SEARCH_SCHEMAS); - if(QGSettings::isSchemaInstalled(id)) { - m_searchSettings = new QGSettings(id); - if(!m_searchSettings->keys().contains(SEARCH_METHOD_KEY)) { - qWarning() << "Can not find gsettings key:" << UKUI_SEARCH_SCHEMAS << SEARCH_METHOD_KEY; - return; - } - } else { - qWarning() << "Can not find gsettings:" << UKUI_SEARCH_SCHEMAS; - return; - } - - connect(DirWatcher::getDirWatcher(), &DirWatcher::appendIndexItem, this, &FileIndexManager::handleIndexPathAppend, Qt::QueuedConnection); - connect(DirWatcher::getDirWatcher(), &DirWatcher::removeIndexItem, this, &FileIndexManager::handleRemovePathAppend, Qt::QueuedConnection); - - - DirWatcher::getDirWatcher()->initDbusService(); -} - -void FileIndexManager::handleIndexPathAppend(const QString path, const QStringList blockList) -{ - qDebug() << "Add Index path:" << path << " blockList:" << blockList; - if(!m_searchSettings->get(SEARCH_METHOD_KEY).toBool()) { - m_searchSettings->set(SEARCH_METHOD_KEY, true); - } else { - m_fi->addIndexPath(path, blockList); - m_iw->addIndexPath(path, blockList); - } -} - -void FileIndexManager::handleRemovePathAppend(const QString path) -{ - qDebug() << "Remove index path:" << path; - if(m_searchSettings->get(SEARCH_METHOD_KEY).toBool()) { - m_iw->removeIndexPath(path, true); - } else { - m_iw->removeIndexPath(path, false); - } -} diff --git a/libsearch/index/file-index-manager.h b/libsearch/index/file-index-manager.h deleted file mode 100644 index c961dfe..0000000 --- a/libsearch/index/file-index-manager.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef SEARCHMETHODMANAGER_H -#define SEARCHMETHODMANAGER_H - -#include -#include -#include -#include "first-index.h" -//#include "inotify-index.h" -#include "inotify-watch.h" -namespace UkuiSearch { -class FileIndexManager : public QObject { - Q_OBJECT -public: - static FileIndexManager *getInstance(); - void searchMethod(FileUtils::SearchMethod sm); - void initIndexPathSetFunction(); -private Q_SLOTS: - void handleIndexPathAppend(const QString path, const QStringList blockList); - void handleRemovePathAppend(const QString path); -private: - FileIndexManager(QObject *parent = nullptr); - FirstIndex *m_fi; -// InotifyIndex* m_ii; - InotifyWatch *m_iw = nullptr; - QSystemSemaphore m_semaphore; - QGSettings *m_searchSettings = nullptr; -}; -} - -#endif // SEARCHMETHODMANAGER_H diff --git a/libsearch/index/file-indexer-config.cpp b/libsearch/index/file-indexer-config.cpp new file mode 100644 index 0000000..6db2a01 --- /dev/null +++ b/libsearch/index/file-indexer-config.cpp @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#include "file-indexer-config.h"\ + +#include +#include +#include +#define INDEX_SETTINGS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-service.conf" + +static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings"); +static const QString FILE_INDEX_ENABLE_KEY = QStringLiteral("fileIndexEnable"); +static const QString CONTENT_INDEX_ENABLE_KEY = QStringLiteral("contentIndexEnable"); +static const QString OCR_ENABLE_KEY = QStringLiteral("ocrEnable"); +static const QString META_DATA_INDEX_ENABLE_KEY = QStringLiteral("metaDataIndexEnable"); + +static std::once_flag flag; +static FileIndexerConfig *global_intance = nullptr; + +FileIndexerConfig *FileIndexerConfig::getInstance() +{ + std::call_once(flag, [ & ] { + global_intance = new FileIndexerConfig(); + }); + return global_intance; +} + +FileIndexerConfig::FileIndexerConfig(QObject *parent) \ + : QObject(parent), + m_dirWatcher(DirWatcher::getDirWatcher()) +{ + connect(m_dirWatcher, &DirWatcher::appendIndexItem, this, &FileIndexerConfig::appendIndexDir); + connect(m_dirWatcher, &DirWatcher::removeIndexItem, this, &FileIndexerConfig::removeIndexDir); + + const QByteArray id(UKUI_SEARCH_SCHEMAS); + if(QGSettings::isSchemaInstalled(id)) { + m_gsettings = new QGSettings(id, QByteArray(), this); + connect(m_gsettings, &QGSettings::changed, this, [ = ](const QString &key) { + if(key == FILE_INDEX_ENABLE_KEY) { + Q_EMIT this->fileIndexEnableStatusChanged(m_gsettings->get(FILE_INDEX_ENABLE_KEY).toBool()); + } + }); + } else { + qWarning() << UKUI_SEARCH_SCHEMAS << " is not found!"; + } + + m_settings = new QSettings(INDEX_SETTINGS, QSettings::IniFormat, this); + +} + +FileIndexerConfig::~FileIndexerConfig() +{ +} + +QStringList FileIndexerConfig::currentIndexableDir() +{ + return DirWatcher::getDirWatcher()->currentIndexableDir(); +} + +QStringList FileIndexerConfig::currentBlackListOfIndex() +{ + return DirWatcher::getDirWatcher()->currentBlackListOfIndex(); +} + +bool FileIndexerConfig::isFileIndexEnable() +{ + if(m_gsettings) { + if(m_gsettings->keys().contains(FILE_INDEX_ENABLE_KEY)) { + return m_gsettings->get(FILE_INDEX_ENABLE_KEY).toBool(); + } else { + qWarning() << "FileIndexerConfig: Can not find key:" << FILE_INDEX_ENABLE_KEY << "in" << UKUI_SEARCH_SCHEMAS; + return false; + } + } else { + qWarning() << "FileIndexerConfig:" << UKUI_SEARCH_SCHEMAS << " is not found!"; + return false; + } +} + +bool FileIndexerConfig::isContentIndexEnable() +{ + return m_settings->value(CONTENT_INDEX_ENABLE_KEY, true).toBool(); +} + +bool FileIndexerConfig::isOCREnable() +{ + return m_settings->value(OCR_ENABLE_KEY, true).toBool(); +} + +bool FileIndexerConfig::isMetaDataIndexEnable() +{ + return m_settings->value(META_DATA_INDEX_ENABLE_KEY, true).toBool(); +} diff --git a/libsearch/index/file-indexer-config.h b/libsearch/index/file-indexer-config.h new file mode 100644 index 0000000..3ba595c --- /dev/null +++ b/libsearch/index/file-indexer-config.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#ifndef FILEINDEXERCONFIG_H +#define FILEINDEXERCONFIG_H + +#include +#include +#include +#include +#include "dir-watcher.h" + +class FileIndexerConfig : public QObject +{ + Q_OBJECT +public: + static FileIndexerConfig* getInstance(); + /** + * @brief currentIndexableDir + * @return 当前可索引的所有目录 + */ + QStringList currentIndexableDir(); + /** + * @brief currentBlackListOfIndex + * @return 当前索引黑名单 + */ + QStringList currentBlackListOfIndex(); + /** + * @brief isFileIndexEnable + * @return 是否启动基本索引(只索引文件基本数据,包含路径等内容) + */ + bool isFileIndexEnable(); + /** + * @brief isContentIndexEnable + * @return 是否启动文本内容索引 + */ + bool isContentIndexEnable(); + /** + * @brief isOCREnable + * @return 是否激活OCR功能(文件内容索引) + */ + bool isOCREnable(); + /** + * @brief isMetaDataIndexEnable + * @return 是否激活元数据索引 + */ + bool isMetaDataIndexEnable(); + +Q_SIGNALS: + /** + * @brief appendIndexDir + * 索引目录增加时发送,参数为增加的目录和其下的黑名单 + */ + void appendIndexDir(const QString&, const QStringList&); + /** + * @brief removeIndexDir + * 移除索引目录时发送 + */ + void removeIndexDir(const QString&); + /** + * @brief fileIndexEnableStatusChanged + * 文件索引开关(基本索引) + */ + void fileIndexEnableStatusChanged(bool); + +private: + explicit FileIndexerConfig(QObject *parent = nullptr); + ~FileIndexerConfig(); + + DirWatcher *m_dirWatcher = nullptr; + QGSettings *m_gsettings = nullptr; + QSettings *m_settings = nullptr; + QAtomicInt m_stop; + +}; + +#endif // FILEINDEXERCONFIG_H diff --git a/libsearch/index/file-reader.cpp b/libsearch/index/file-reader.cpp index 401e6fd..f462efe 100644 --- a/libsearch/index/file-reader.cpp +++ b/libsearch/index/file-reader.cpp @@ -22,13 +22,12 @@ #include "binary-parser.h" #include "ocrobject.h" using namespace UkuiSearch; -FileReader::FileReader(QObject *parent) : QObject(parent) { +FileReader::FileReader(QObject *parent) : QObject(parent) +{ } -void FileReader::getTextContent(QString path, QString &textContent, QString &suffix) { - QFileInfo file(path); - suffix = file.suffix(); - +void FileReader::getTextContent(const QString &path, QString &textContent, const QString &suffix) +{ if (suffix == "docx") { FileUtils::getDocxTextContent(path, textContent); } else if (suffix == "pptx") { diff --git a/libsearch/index/file-reader.h b/libsearch/index/file-reader.h index 373ec00..6514446 100644 --- a/libsearch/index/file-reader.h +++ b/libsearch/index/file-reader.h @@ -28,7 +28,7 @@ class FileReader : public QObject { public: explicit FileReader(QObject *parent = nullptr); ~FileReader() = default; - static void getTextContent(QString path, QString &textContent, QString &suffix); + static void getTextContent(const QString &path, QString &textContent, const QString &suffix); }; } diff --git a/libsearch/index/file-search-plugin.cpp b/libsearch/index/file-search-plugin.cpp index e03d6b9..ee3b305 100644 --- a/libsearch/index/file-search-plugin.cpp +++ b/libsearch/index/file-search-plugin.cpp @@ -1,3 +1,22 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ #include "file-search-plugin.h" #include "search-manager.h" #include diff --git a/libsearch/index/file-search-plugin.h b/libsearch/index/file-search-plugin.h index 762e6b8..354a8e2 100644 --- a/libsearch/index/file-search-plugin.h +++ b/libsearch/index/file-search-plugin.h @@ -1,3 +1,22 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ #ifndef FILESEARCHPLUGIN_H #define FILESEARCHPLUGIN_H diff --git a/libsearch/index/file-searcher.cpp b/libsearch/index/file-searcher.cpp deleted file mode 100644 index 6619ee0..0000000 --- a/libsearch/index/file-searcher.cpp +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Authors: zhangpengfei - * - */ -#include -#include -#include -#include -#include -#include -#include "file-searcher.h" -#include "global-settings.h" - -size_t FileSearcher::uniqueSymbol1 = 0; -size_t FileSearcher::uniqueSymbol2 = 0; -size_t FileSearcher::uniqueSymbol3 = 0; -QMutex FileSearcher::m_mutex1; -QMutex FileSearcher::m_mutex2; -QMutex FileSearcher::m_mutex3; -FileSearcher::FileSearcher(QObject *parent) : QObject(parent) { -} - -FileSearcher::~FileSearcher() { -} - -int FileSearcher::getCurrentIndexCount() { - try { - Xapian::Database db(INDEX_PATH); - return db.get_doccount(); - } catch(const Xapian::Error &e) { - qWarning() << QString::fromStdString(e.get_description()); - return 0; - } -} - -void FileSearcher::onKeywordSearch(QString keyword, QQueue *searchResultFile, QQueue *searchResultDir, QQueue> *searchResultContent) { - m_mutex1.lock(); - ++uniqueSymbol1; - m_mutex1.unlock(); - m_mutex2.lock(); - ++uniqueSymbol2; - m_mutex2.unlock(); - m_mutex3.lock(); - ++uniqueSymbol3; - m_mutex3.unlock(); - - m_search_result_file = searchResultFile; - m_search_result_dir = searchResultDir; - m_search_result_content = searchResultContent; - - //file - QtConcurrent::run([&, uniqueSymbol1, keyword]() { - if(!m_search_result_file->isEmpty()) - m_search_result_file->clear(); - int begin = 0; - int num = 5; - int resultCount = 0; - int total = 0; - while(total < 100) { - resultCount = keywordSearchfile(uniqueSymbol1, keyword, "0", 1, begin, num); - if(resultCount == 0 || resultCount == -1) - break; - total += resultCount; - begin += num; - } - return; - }); - // Q_EMIT this->resultFile(m_search_result_file); - //dir - QtConcurrent::run([&, uniqueSymbol2, keyword]() { - if(!m_search_result_dir->isEmpty()) - m_search_result_dir->clear(); - int begin = 0; - int num = 5; - int resultCount = 0; - int total = 0; - while(total < 100) { - resultCount = keywordSearchfile(uniqueSymbol2, keyword, "1", 1, begin, num); - if(resultCount == 0 || resultCount == -1) - break; - total += resultCount; - begin += num; - } - return; - }); - // Q_EMIT this->resultDir(m_search_result_dir); - //content - QtConcurrent::run([&, uniqueSymbol3, keyword]() { - if(!m_search_result_content->isEmpty()) - m_search_result_content->clear(); - int begin = 0; - int num = 5; - int resultCount = 0; - int total = 0; - - while(total < 50) { - resultCount = keywordSearchContent(uniqueSymbol3, keyword, begin, num); - if(resultCount == 0 || resultCount == -1) - break; - total += resultCount; - begin += num; - } - return; - }); - // Q_EMIT this->resultContent(m_search_result_content); -} - -int FileSearcher::keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) { - try { - qDebug() << "--keywordSearchfile start--"; - Xapian::Database db(INDEX_PATH); - Xapian::Query query = creatQueryForFileSearch(keyword, db); - Xapian::Enquire enquire(db); - - Xapian::Query queryFile; - if(!value.isEmpty()) { - std::string slotValue = value.toStdString(); - Xapian::Query queryValue = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, slot, slotValue, slotValue); - queryFile = Xapian::Query(Xapian::Query::OP_AND, query, queryValue); - } else { - queryFile = query; - } - - qDebug() << "keywordSearchfile:" << QString::fromStdString(queryFile.get_description()); - - enquire.set_query(queryFile); - Xapian::MSet result = enquire.get_mset(begin, num); - int resultCount = result.size(); - qDebug() << "keywordSearchfile results count=" << resultCount; - if(result.size() == 0) - return 0; - if(getResult(uniqueSymbol, result, value) == -1) - return -1; - - qDebug() << "--keywordSearchfile finish--"; - return resultCount; - } catch(const Xapian::Error &e) { - qWarning() << QString::fromStdString(e.get_description()); - qDebug() << "--keywordSearchfile finish--"; - return -1; - } -} - -int FileSearcher::keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin, int num) { - try { - qDebug() << "--keywordSearchContent search start--"; - - Xapian::Database db(CONTENT_INDEX_PATH); - Xapian::Enquire enquire(db); - Xapian::QueryParser qp; - qp.set_default_op(Xapian::Query::OP_AND); - qp.set_database(db); - - QVector sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword); - //Creat a query - std::string words; - for(int i = 0; i < sKeyWord.size(); i++) { - words.append(sKeyWord.at(i).word).append(" "); - } - Xapian::Query query = qp.parse_query(words); - - // std::vector v; - // for(int i=0;i v; - for(int i = 0; i < userInput.size(); i++) { - v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.at(i)).toStdString())); - // qDebug()<append(QString::fromStdString(data)); - qDebug() << path << "is not exist!!"; - } else { - switch(value.toInt()) { - case 1: - m_mutex1.lock(); - if(uniqueSymbol == FileSearcher::uniqueSymbol1) { - m_search_result_dir->enqueue(path); - m_mutex1.unlock(); - } else { - m_mutex1.unlock(); - return -1; - } - - break; - case 0: - m_mutex2.lock(); - if(uniqueSymbol == FileSearcher::uniqueSymbol2) { - m_search_result_file->enqueue(path); - m_mutex2.unlock(); - } else { - m_mutex2.unlock(); - return -1; - } - break; - default: - break; - } - // searchResult.append(path); - } - qDebug() << "doc=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent; - } - // if(!pathTobeDelete->isEmpty()) - // deleteAllIndex(pathTobeDelete) - return 0; -} - -int FileSearcher::getContentResult(size_t uniqueSymbol, Xapian::MSet &result, std::string &keyWord) { - //QStringList *pathTobeDelete = new QStringList; - //Delete those path doc which is not already exist. - - QString wordTobeFound = QString::fromStdString(keyWord).section(" ", 0, 0); - int size = wordTobeFound.size(); - int totalSize = QString::fromStdString(keyWord).size(); - if(totalSize < 5) - totalSize = 5; - // QMap searchResult; - - for(auto it = result.begin(); it != result.end(); ++it) { - Xapian::Document doc = it.get_document(); - std::string data = doc.get_data(); - double docScoreWeight = it.get_weight(); - Xapian::percent docScorePercent = it.get_percent(); - QString path = QString::fromStdString(doc.get_value(1)); - - if(isBlocked(path)) - continue; - - QFileInfo info(path); - - if(!info.exists()) { - // pathTobeDelete->append(QString::fromStdString(data)); - qDebug() << path << "is not exist!!"; - continue; - } - // Construct snippets containing keyword. - QStringList snippets; - auto term = doc.termlist_begin(); - term.skip_to(wordTobeFound.toStdString()); - int count = 0; - for(auto pos = term.positionlist_begin(); pos != term.positionlist_end() && count < 6; ++pos) { - QByteArray snippetByte = QByteArray::fromStdString(data); - QString snippet = "..." + QString(snippetByte.left(*pos)).right(size + totalSize) + QString(snippetByte.mid(*pos, -1)).left(size + totalSize) + "..."; - // qDebug()<enqueue(qMakePair(path, snippets)); - m_mutex3.unlock(); - } else { - m_mutex3.unlock(); - return -1; - } - // searchResult.insert(path,snippets); - qDebug() << "path=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent; - } - // if(!pathTobeDelete->isEmpty()) - // deleteAllIndex(pathTobeDelete) - return 0; -} - -bool FileSearcher::isBlocked(QString &path) { - QStringList blockList = GlobalSettings::getInstance()->getBlockDirs(); - for(QString i : blockList) { - if(path.startsWith(i.prepend("/"))) - return true; - } - return false; - -} diff --git a/libsearch/index/file-searcher.h b/libsearch/index/file-searcher.h deleted file mode 100644 index d8e6979..0000000 --- a/libsearch/index/file-searcher.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Authors: zhangpengfei - * - */ -#ifndef FILESEARCHER_H -#define FILESEARCHER_H - -#include -#include -#include -#include -#include -#include -#include -#include -#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString() -#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString() - - -class FileSearcher : public QObject { - Q_OBJECT -public: - explicit FileSearcher(QObject *parent = nullptr); - ~FileSearcher(); - - static int getCurrentIndexCount(); - - static size_t uniqueSymbol1; - static size_t uniqueSymbol2; - static size_t uniqueSymbol3; - static QMutex m_mutex1; - static QMutex m_mutex2; - static QMutex m_mutex3; - -public Q_SLOTS: - void onKeywordSearch(QString keyword, QQueue *searchResultFile, QQueue *searchResultDir, QQueue> *searchResultContent); - -Q_SIGNALS: - void resultFile(QQueue *); - void resultDir(QQueue *); - void resultContent(QQueue> *); -private: - int keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value, unsigned slot = 1, int begin = 0, int num = 20); - int keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20); - - /** - * @brief FileSearcher::creatQueryForFileSearch - * This part shall be optimized frequently to provide a more stable search function. - * @param keyword - * @param db - * @return Xapian::Query - */ - Xapian::Query creatQueryForFileSearch(QString keyword, Xapian::Database &db); - Xapian::Query creatQueryForContentSearch(QString keyword, Xapian::Database &db); - - int getResult(size_t uniqueSymbol, Xapian::MSet &result, QString value); - int getContentResult(size_t uniqueSymbol, Xapian::MSet &result, std::string &keyWord); - - bool isBlocked(QString &path); - - QQueue *m_search_result_file = nullptr; - QQueue *m_search_result_dir = nullptr; - QQueue> *m_search_result_content = nullptr; - bool m_searching = false; -}; - -#endif // FILESEARCHER_H diff --git a/libsearch/index/file-watcher.cpp b/libsearch/index/file-watcher.cpp new file mode 100644 index 0000000..789e215 --- /dev/null +++ b/libsearch/index/file-watcher.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#include "file-watcher.h" +using namespace UkuiSearch; +FileWatcher::FileWatcher(QObject *parent) : QObject(parent), m_config(FileIndexerConfig::getInstance()) +{ + qRegisterMetaType>("QVector"); + m_watcher = new FileSystemWatcher(); + m_pendingFileQUeue = PendingFileQueue::getInstance(); + + connect(m_watcher, &FileSystemWatcher::created, this, &FileWatcher::onFileCreated); + connect(m_watcher, &FileSystemWatcher::modified, this, &FileWatcher::onFileModefied); + connect(m_watcher, &FileSystemWatcher::deleted, this, &FileWatcher::onFileDeletedOrMoved); + connect(m_watcher, &FileSystemWatcher::moved, this, &FileWatcher::onFileDeletedOrMoved); + + connect(m_pendingFileQUeue, &PendingFileQueue::filesUpdate, this, &FileWatcher::filesUpdate); +} + +FileWatcher::~FileWatcher() +{ + if(m_watcher) { + delete m_watcher; + m_watcher = nullptr; + } +} + +void FileWatcher::addWatch(const QString &path, const QStringList &blackList) +{ + m_watcher->addWatchWithBlackList(QStringList(path), blackList); +} + +void FileWatcher::removeWatch(const QString &path, bool updateIndex) +{ + QStringList paths = m_watcher->removeWatch(path); + if(updateIndex) { + for(QString &pathToDelete : paths) { + PendingFile file(pathToDelete); + file.setIsDir(); + file.setDeleted(); + m_pendingFileQUeue->enqueue(file); + } + } +} + +void FileWatcher::installWatches() +{ + m_watcher->addWatchWithBlackList(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex()); + qDebug() << "Add watch ->" << m_config->currentIndexableDir() << "black list" << m_config->currentBlackListOfIndex(); +} + +void FileWatcher::removeWatch() +{ + m_watcher->clearAll(); +} + +void FileWatcher::onFileDeletedOrMoved(const QString &path, bool isDir) +{ + PendingFile file(path); + file.setIsDir(isDir); + file.setDeleted(); + m_pendingFileQUeue->enqueue(file); +} + +void FileWatcher::onFileCreated(const QString &path, bool isDir) +{ + PendingFile file(path); + file.setIsDir(isDir); + file.setCreated(); + m_pendingFileQUeue->enqueue(file); +} + +void FileWatcher::onFileModefied(const QString &path) +{ + PendingFile file(path); + file.setModified(); + m_pendingFileQUeue->enqueue(file); +} diff --git a/libsearch/index/file-watcher.h b/libsearch/index/file-watcher.h new file mode 100644 index 0000000..88fefc5 --- /dev/null +++ b/libsearch/index/file-watcher.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#ifndef FILEWATCHER_H +#define FILEWATCHER_H + +#include +#include "file-system-watcher.h" +#include "file-indexer-config.h" +#include "pending-file-queue.h" +namespace UkuiSearch { +/** + * @brief The FileWatcher class + * 文件监听与信号处理控制中心 + */ +class FileWatcher : public QObject +{ + Q_OBJECT +public: + explicit FileWatcher(QObject *parent = nullptr); + ~FileWatcher(); + +public Q_SLOTS: + /** + * @brief addWatch + * 增加监听目录 + * @param 要增加的目录和黑名单 + */ + void addWatch(const QString& path, const QStringList& blackList); + void removeWatch(const QString& path, bool updateIndex = true); + + /** + * @brief installWatches + * 安装监听 + */ + void installWatches(); + /** + * @brief removeWatch + * 移除所有监听 + */ + void removeWatch(); + +Q_SIGNALS: + void filesUpdate(const QVector&); + void installedWatches(); + +private: + void onFileCreated(const QString& path, bool isDir); + void onFileModefied(const QString& path); + void onFileDeletedOrMoved(const QString& path, bool isDir); + FileSystemWatcher *m_watcher = nullptr; + FileIndexerConfig *m_config = nullptr; + PendingFileQueue *m_pendingFileQUeue = nullptr; + +}; +} +#endif // FILEWATCHER_H diff --git a/libsearch/index/first-index.cpp b/libsearch/index/first-index.cpp deleted file mode 100644 index 1a938a6..0000000 --- a/libsearch/index/first-index.cpp +++ /dev/null @@ -1,374 +0,0 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Authors: zhangzihao - * Modified by: zhangpengfei - * - */ -//#include -#include "first-index.h" -#include "dir-watcher.h" -#include -/*需要重构: - *支持新建或重建指定目录索引 - *支持判断所有数据库状态,根据状态判断是否单独重建某个数据库。 - *支持自定义增加索引目录。 - */ - -using namespace UkuiSearch; -FirstIndex *FirstIndex::m_instance = nullptr; -std::once_flag g_firstIndexInstanceFlag; -FirstIndex::FirstIndex() : m_semaphore(INDEX_SEM, 1, QSystemSemaphore::AccessMode::Open) -{ - m_pool.setMaxThreadCount(2); - m_pool.setExpiryTimeout(100); - connect(this, &FirstIndex::needRebuild, this, &FirstIndex::rebuildDatebase, Qt::QueuedConnection); -} - -FirstIndex *FirstIndex::getInstance() -{ - std::call_once(g_firstIndexInstanceFlag, [] () { - m_instance = new FirstIndex; - }); - return m_instance; -} - -FirstIndex::~FirstIndex() { - qDebug() << "~FirstIndex"; - if(this->m_indexData) - delete this->m_indexData; - this->m_indexData = nullptr; - if(this->m_contentIndexData) - delete this->m_contentIndexData; - this->m_contentIndexData = nullptr; - if(this->m_ocrIndexData) - delete this->m_ocrIndexData; - this->m_ocrIndexData = nullptr; - qDebug() << "~FirstIndex end"; -} - -void FirstIndex::work(const QFileInfo& fileInfo) { - // qDebug() << "there are some shit here"<m_indexData->enqueue(QVector() << fileInfo.fileName() - << fileInfo.absoluteFilePath() - << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0") - << fileInfo.lastModified().toString("yyyyMMddHHmmss")); - if (fileInfo.fileName().split(".", QString::SkipEmptyParts).length() < 2) - return; - if (true == targetFileTypeMap[fileInfo.fileName().split(".").last()] - and false == FileUtils::isEncrypedOrUnreadable(fileInfo.absoluteFilePath())) { - if (fileInfo.fileName().split(".").last() == "docx") { - QuaZip file(fileInfo.absoluteFilePath()); - if(!file.open(QuaZip::mdUnzip)) - return; - if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) - return; - QuaZipFile fileR(&file); - this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//docx解压缩后的xml文件为实际需要解析文件大小 - file.close(); - } else if (fileInfo.fileName().split(".").last() == "pptx") { - QuaZip file(fileInfo.absoluteFilePath()); - if(!file.open(QuaZip::mdUnzip)) - return; - QString prefix("ppt/slides/slide"); - qint64 fileSize(0); - qint64 fileIndex(0); - for(QString i : file.getFileNameList()) { - if(i.startsWith(prefix)){ - QString name = prefix + QString::number(fileIndex + 1) + ".xml"; - fileIndex++; - if(!file.setCurrentFile(name)) { - continue; - } - QuaZipFile fileR(&file); - fileSize += fileR.usize(); - } - } - file.close(); - this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileSize));//pptx解压缩后的xml文件为实际需要解析文件大小 - } else if (fileInfo.fileName().split(".").last() == "xlsx") { - QuaZip file(fileInfo.absoluteFilePath()); - if(!file.open(QuaZip::mdUnzip)) - return; - if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) - return; - QuaZipFile fileR(&file); - this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//xlsx解压缩后的xml文件为实际解析文件大小 - file.close(); - } else { - this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size())); - } - } else if (true == targetPhotographTypeMap[fileInfo.fileName().split(".").last()]) { - if (FileUtils::isOcrSupportSize(fileInfo.absoluteFilePath())) { - this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size())); - //this->m_ocrIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size())); - } - } -} - -void FirstIndex::rebuildDatebase() -{ - m_semaphore.acquire(); - m_isRebuildProcess = true; - this->wait(); - this->start(); -} - -void FirstIndex::addIndexPath(const QString path, const QStringList blockList) -{ - m_semaphore.acquire(); - m_isRebuildProcess = false; - setPath(QStringList() << path); - setBlockPath(blockList); - this->wait(); - this->start(); -} - -void FirstIndex::run() { - QTime t1 = QTime::currentTime(); - QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString(); - QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString(); - // QString ocrIndexDatabaseStatus = IndexStatusRecorder::getInstance()->getStatus(OCR_DATABASE_STATE).toString(); - QString inotifyIndexStatus = IndexStatusRecorder::getInstance()->getStatus(INOTIFY_NORMAL_EXIT).toString(); - - qInfo() << "indexDataBaseStatus: " << indexDataBaseStatus; - qInfo() << "contentIndexDataBaseStatus: " << contentIndexDataBaseStatus; - // qInfo() << "ocrIndexDatabaseStatus: " << ocrIndexDatabaseStatus; - qInfo() << "inotifyIndexStatus: " << inotifyIndexStatus; - - m_inotifyIndexStatus = inotifyIndexStatus == "2" ? true : false; - m_indexDatabaseStatus = indexDataBaseStatus == "2" ? true : false; - m_contentIndexDatabaseStatus = contentIndexDataBaseStatus == "2" ? true : false; - // m_ocrIndexDatabaseStatus = ocrIndexDatabaseStatus == "2" ? true : false; - - if(m_inotifyIndexStatus && m_indexDatabaseStatus && m_contentIndexDatabaseStatus /*&& m_ocrIndexDatabaseStatus*/) { - m_needRebuild = false; - if(m_isRebuildProcess) { - m_isRebuildProcess = false; - m_semaphore.release(1); - return; - } - } else { - if(m_isRebuildProcess) { - setPath(DirWatcher::getDirWatcher()->currentIndexableDir()); - setBlockPath(DirWatcher::getDirWatcher()->currentBlackListOfIndex()); - } else { - if(m_inotifyIndexStatus && (!m_indexDatabaseStatus || !m_contentIndexDatabaseStatus)) { - m_needRebuild = true; - } - if(!m_inotifyIndexStatus || (!m_indexDatabaseStatus && !m_contentIndexDatabaseStatus)) { - m_needRebuild = false; - qInfo() << "Entering rebuild procedure"; - Q_EMIT needRebuild(); - m_semaphore.release(1); - return; - } - } - } - - - IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "0"); - - this->m_indexData = new QQueue>(); - this->m_contentIndexData = new QQueue>(); -// this->m_ocrIndexData = new QQueue>(); - - ++FileUtils::indexStatus; - pid_t pid; - pid = fork(); - if(pid == 0) { - prctl(PR_SET_PDEATHSIG, SIGTERM); - prctl(PR_SET_NAME, "first-index"); - - QSemaphore sem(5); - QMutex mutex1, mutex2, mutex3; - mutex1.lock(); - mutex2.lock(); - // mutex3.lock(); - - //FIXME:在子进程里使用和父进程同样的dbus接口会出问题。 -// qInfo() << "index dir" << DirWatcher::getDirWatcher()->currentIndexableDir(); -// qInfo() << "index block dir" << DirWatcher::getDirWatcher()->currentBlackListOfIndex(); - qInfo() << "index dir" << m_pathList; - qInfo() << "index block dir" << m_blockList; - this->Traverse(); - - FileUtils::maxIndexCount = this->m_indexData->length(); - qDebug() << "max_index_count:" << FileUtils::maxIndexCount; - QtConcurrent::run(&m_pool, [&]() { - sem.acquire(2); - mutex1.unlock(); - if(m_isRebuildProcess && m_inotifyIndexStatus && m_indexDatabaseStatus) { //重建索引且无异常 - sem.release(2); - return; - } else if(m_isRebuildProcess) { //重建索引且有异常 - IndexGenerator::getInstance()->rebuildIndexDatabase(); - } else if(!m_inotifyIndexStatus || !m_indexDatabaseStatus) { //添加目录且有异常 - qWarning() << "Index database need rebuild!"; - sem.release(2); - return; - } - qDebug() << "index start;" << m_indexData->size(); - - QQueue>* tmp1 = new QQueue>(); - bool sucess = true; - while(!this->m_indexData->empty()) { - for(size_t i = 0; (i < 8192) && (!this->m_indexData->empty()); ++i) { - tmp1->enqueue(this->m_indexData->dequeue()); - } - if(!IndexGenerator::getInstance()->creatAllIndex(tmp1)) { - sucess = false; - break; - } - tmp1->clear(); - } - delete tmp1; - qDebug() << "index end;"; - if(sucess) { - IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "2"); - } - sem.release(2); - }); - QtConcurrent::run(&m_pool,[&]() { - sem.acquire(2); - mutex2.unlock(); - if(m_isRebuildProcess && m_inotifyIndexStatus && m_contentIndexDatabaseStatus) { - sem.release(2); - return; - } else if(m_isRebuildProcess) { //重建索引且有异常 - IndexGenerator::getInstance()->rebuildContentIndexDatabase(); - } else if(!m_inotifyIndexStatus || !m_contentIndexDatabaseStatus) { //添加目录且有异常 - qWarning() << "Content index database need rebuild!"; - sem.release(2); - return; - } - qDebug() << "content index start:" << m_contentIndexData->size(); - QQueue* tmp2 = new QQueue(); - bool sucess = true; - while(!this->m_contentIndexData->empty()) { - qint64 fileSize = 0; - //修改一次处理的数据量,从30个文件改为文件总大小为50M以下,50M为暂定值--jxx20210519 - for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->m_contentIndexData->empty()); ++i) { - QPair tempPair = this->m_contentIndexData->dequeue(); - fileSize += tempPair.second; - if (fileSize > 52428800 ) { - if (tmp2->size() == 0) { - tmp2->enqueue(tempPair.first); - break; - } - this->m_contentIndexData->enqueue(tempPair); - break; - } - tmp2->enqueue(tempPair.first); - } - if(!IndexGenerator::getInstance()->creatAllIndex(tmp2)) { - sucess = false; - break; - } - tmp2->clear(); - } - delete tmp2; - qDebug() << "content index end;"; - if(sucess) { - IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, "2"); - } - sem.release(2); - }); - // OCR功能目前合到内容搜索分类中 -// QtConcurrent::run(&m_pool,[&]() { -// sem.acquire(5); -// mutex3.unlock(); -// QQueue* tmpOcr = new QQueue(); -// qDebug() << "m_ocr_index:" << m_ocr_index->size(); -// if(m_isFirstIndex && m_allDatadaseStatus && m_contentIndexDatabaseStatus) { -// sem.release(2); -// return; -// } -// IndexGenerator::getInstance()->rebuildOcrIndexDatabase(); -// bool sucess = true; -// while(!this->m_ocr_index->empty()) { -// qint64 fileSize = 0; -// //一次处理的数据量文件总大小为50M以下,50M为暂定值 -// for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->m_ocr_index->empty()); ++i) { -// QPair tempPair = this->m_ocr_index->dequeue(); -// fileSize += tempPair.second; -// if (fileSize > 52428800) { -// if (tmpOcr->size() == 0) { -// tmpOcr->enqueue(tempPair.first); -// break; -// } -// this->m_ocr_index->enqueue(tempPair); -// break; -// } -// tmpOcr->enqueue(tempPair.first); -// } -// if(!IndexGenerator::getInstance()->creatAllIndex(tmpOcr)) { -// sucess = false; -// break; -// } -// tmpOcr->clear(); -// } -// delete tmpOcr; -// qDebug() << "OCR index end;"; -// if(sucess) { -// IndexStatusRecorder::getInstance()->setStatus(OCR_DATABASE_STATE, "2"); -// } -// sem.release(5); -// }); - mutex1.lock(); - mutex2.lock(); - // mutex3.lock(); - sem.acquire(5); - mutex1.unlock(); - mutex2.unlock(); - // mutex3.unlock(); - - if(this->m_indexData) - delete this->m_indexData; - this->m_indexData = nullptr; - if(this->m_contentIndexData) - delete this->m_contentIndexData; - this->m_contentIndexData = nullptr; - if(this->m_ocrIndexData) - delete this->m_ocrIndexData; - this->m_ocrIndexData = nullptr; - ::_exit(0); - } else if(pid < 0) { - qWarning() << "First Index fork error!!"; - } else { - waitpid(pid, NULL, 0); - --FileUtils::indexStatus; - } - - IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2"); - if(m_needRebuild) { - m_needRebuild = false; - qInfo() << "Entering rebuild procedure"; - Q_EMIT needRebuild(); - } - m_semaphore.release(1); - // int retval1 = write(fifo_fd, buffer, strlen(buffer)); - // if(retval1 == -1) { - // qWarning("write error\n"); - // } - // qDebug("write data ok!\n"); - QTime t2 = QTime::currentTime(); - qWarning() << t1; - qWarning() << t2; - - return; - -} diff --git a/libsearch/index/first-index.h b/libsearch/index/first-index.h deleted file mode 100644 index 9f0cf21..0000000 --- a/libsearch/index/first-index.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Authors: zhangzihao - * Modified by: zhangpengfei - * - */ -#ifndef FIRSTINDEX_H -#define FIRSTINDEX_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "traverse-bfs.h" -#include "index-status-recorder.h" -#include "index-generator.h" -#include "file-utils.h" -#include "common.h" -namespace UkuiSearch { -class FirstIndex : public QThread, public TraverseBFS -{ - Q_OBJECT -public: - static FirstIndex* getInstance(); - ~FirstIndex(); - virtual void work(const QFileInfo &) final; - void rebuildDatebase(); - void addIndexPath(const QString path, const QStringList blockList); -Q_SIGNALS: - void needRebuild(); -protected: - void run() override; -private: - FirstIndex(); - FirstIndex(const FirstIndex&) = delete; - void operator=(const FirstIndex&) = delete; - - static FirstIndex *m_instance; - - bool m_indexDatabaseStatus = false; - bool m_contentIndexDatabaseStatus = false; - bool m_ocrIndexDatabaseStatus = false; - bool m_inotifyIndexStatus = false; - bool m_isRebuildProcess = true; - bool m_needRebuild = false; - QThreadPool m_pool; - - QQueue>* m_indexData = nullptr; -// QQueue* q_content_index; - //修改QQueue存储数据为QPair,增加存储文件大小数据便于处理时统计--jxx20210519 - QQueue>* m_contentIndexData = nullptr; - //新增ocr队列存储ocr可识别处理的图片信息及大小; - QQueue>* m_ocrIndexData = nullptr; - //xapian will auto commit per 10,000 changes, donnot change it!!! - const size_t u_send_length = 8192; - QSystemSemaphore m_semaphore; - -}; -} - -#endif // FIRSTINDEX_H diff --git a/libsearch/index/first-run-indexer.cpp b/libsearch/index/first-run-indexer.cpp new file mode 100644 index 0000000..35d5ce2 --- /dev/null +++ b/libsearch/index/first-run-indexer.cpp @@ -0,0 +1,310 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#include "first-run-indexer.h" +#include +#include +#include + +#include "file-utils.h" +#include "basic-indexer.h" +#include "file-indexer-config.h" +#include "file-content-indexer.h" +#include "writable-database.h" +using namespace UkuiSearch; +FirstRunIndexer::FirstRunIndexer(const QStringList &folders, const QStringList &blackList, QAtomicInt& stop, WorkModes mode, Targets target) + : m_folders(folders), + m_blackList(blackList), + m_stop(&stop), + m_mode(mode), + m_target(target) +{ +} + +FirstRunIndexer::~FirstRunIndexer() +{ +} + +void FirstRunIndexer::run() +{ + QTime t = QTime::currentTime(); + if(m_target == Target::None) { + return; + } + fetch(); + + if(m_target & Target::Basic) { + basicIndex(); + } + if(m_target & Target::Content) { + contentIndex(); + } + m_cache.clear(); + malloc_trim(0); + qDebug() << "FirstRunIndexer: time :" << t.elapsed(); + Q_EMIT done(); +} + +void FirstRunIndexer::fetch() +{ + qDebug() << "Now begin fetching files to be indexed..."; + qDebug() << "Index folders:" << m_folders << "blacklist :" << m_blackList; + QQueue bfs; + for(QString blockPath : m_blackList) { + for(QString path : m_folders) { + if(FileUtils::isOrUnder(path, blockPath)) { + m_folders.removeOne(path); + } + } + } + m_cache.append(m_folders); + for(QString path : m_folders) { + bfs.enqueue(path); + } + QFileInfoList list; + QDir dir; + QStringList tmpList = m_blackList; + // QDir::Hidden + dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot); + dir.setSorting(QDir::DirsFirst); + while(!bfs.empty()) { + dir.setPath(bfs.dequeue()); + list = dir.entryInfoList(); + for(auto i : list) { + bool isBlocked = false; + for(QString path : tmpList) { + if(i.absoluteFilePath() == path) { + isBlocked = true; + tmpList.removeOne(path); + break; + } + } + if(isBlocked) + continue; + + if(i.isDir() && (!(i.isSymLink()))) { + bfs.enqueue(i.absoluteFilePath()); + } + m_cache.append(i.absoluteFilePath()); + } + } + qDebug() << m_cache.size() << "files founded, start index..."; +} + +void FirstRunIndexer::basicIndex() +{ + qDebug() << "Begin basic index"; + WritableDatabase basicDb(DataBaseType::Basic); + if(!basicDb.open()) { + qWarning() << "Basic db open failed, fail to run basic index!"; + return; + } + QStringList filesNeedIndex; + if(m_mode == WorkMode::Rebuild) { + basicDb.rebuild(); + if(!basicDb.open()) { + qWarning() << "basicDb db open failed, fail to run basic index!"; + return; + } + filesNeedIndex = m_cache; + qDebug() < indexTimes = basicDb.getIndexTimes(); + qDebug() << indexTimes.size() << "documents recorded"; + for(const QString& path : m_cache) { + info.setFile(path); + if(indexTimes.take(FileUtils::makeDocUterm(path)) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) { + filesNeedIndex.append(path); + } + } + if(!indexTimes.isEmpty()) { + qDebug() << indexTimes.size() << "documents need remove."; + for(std::string uniqueTerm : indexTimes.keys()) { + basicDb.removeDocument(uniqueTerm); + basicDb.setMetaData(uniqueTerm, ""); + } + basicDb.commit(); + } + qDebug() << filesNeedIndex.size() << "files need update."; + } + uint allSize = filesNeedIndex.size(); + Q_EMIT progress(IndexType::Basic, allSize, 0); + uint batchSize = 0; + uint finishNum = 0; + for (const QString& path: filesNeedIndex) { + BasicIndexer indexer(path); + if(indexer.index()) { + basicDb.addDocument(indexer.document()); + ++batchSize; + ++finishNum; + } + if(batchSize >= 8192) { + qDebug() << "8192 finished."; + basicDb.commit(); + Q_EMIT progress(IndexType::Basic, allSize, finishNum); + batchSize = 0; + } + } + //TODO:xapian默认10000条自动commit一次,需要根据内存占用情况调整。 + basicDb.commit(); + Q_EMIT progress(IndexType::Basic, allSize, finishNum); + Q_EMIT basicIndexDone(finishNum); + filesNeedIndex.clear(); + qDebug() << "Finish basic index"; +} + +void FirstRunIndexer::contentIndex() +{ + qDebug() << "Begin content index"; + if(m_stop->load()) { + qDebug() << "Index stopped, abort content index."; + return; + } + WritableDatabase contentDb(DataBaseType::Content); + if(!contentDb.open()) { + qWarning() << "Content db open failed, fail to run content index!"; + return; + } + QStringList filesNeedIndex; + QStringList filesNeedOCRIndex; + QMap suffixMap = targetFileTypeMap; + QFileInfo info; + // ocr +// bool ocrEnable = FileIndexerConfig::getInstance()->isOCREnable(); + if(FileIndexerConfig::getInstance()->isOCREnable()) { + qDebug() << "OCR enabled."; + suffixMap.unite(targetPhotographTypeMap); + } + if(m_mode == WorkMode::Rebuild) { + contentDb.rebuild(); + if(!contentDb.open()) { + return; + } + } + if(m_mode == WorkMode::Rebuild || m_mode == WorkMode::Add) { + for(QString path : m_cache) { + info.setFile(path); + if(true == suffixMap[info.suffix()] && info.isFile()) { + if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) { + filesNeedIndex.append(path); + } + } + } + } else if(m_mode == WorkMode::Update) { + QMap indexTimes = contentDb.getIndexTimes(); + qDebug() << indexTimes.size() << "documents recorded"; + for(QString path : m_cache) { + info.setFile(path); + if(true == suffixMap[info.suffix()] && info.isFile()) { + std::string uterm = FileUtils::makeDocUterm(path); + if(indexTimes.value(uterm) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) { + if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) { + filesNeedIndex.append(path); + indexTimes.remove(uterm); + } + } else { + indexTimes.remove(uterm); + } + } + } + if(!indexTimes.isEmpty()) { + qDebug() << indexTimes.size() << "documents need remove"; + for(std::string uniqueTerm : indexTimes.keys()) { + contentDb.removeDocument(uniqueTerm); + contentDb.setMetaData(uniqueTerm, ""); + } + contentDb.commit(); + } + } + + uint allSize = filesNeedIndex.size(); + qDebug() << allSize << "files need content index."; + Q_EMIT progress(IndexType::Contents, allSize, 0); + + uint batchSize = 0; + uint finishNum = 0; + for (QString path : filesNeedIndex) { + info.setFile(path); + if(true == targetPhotographTypeMap[info.suffix()]) { + filesNeedOCRIndex.append(path); + filesNeedIndex.removeOne(path); + continue; + } + fileContentIndexer indexer(path); + if(indexer.index()) { + contentDb.addDocument(indexer.document()); + ++batchSize; + ++finishNum; + } else { + qDebug() << "Extract fail===" << path; + } + if(batchSize >= 30) { + contentDb.commit(); + qDebug() << "30 finished."; + Q_EMIT progress(IndexType::Contents, allSize, finishNum); + batchSize = 0; + } + if(m_stop->load()) { + qDebug() << "Index stopped, interrupt content index."; + break; + } + } + contentDb.commit(); + Q_EMIT progress(IndexType::Contents, allSize, finishNum); + + filesNeedIndex.clear(); + qDebug() << "Content index for normal files finished, now begin OCR index"; + int ocrSize = filesNeedOCRIndex.size(); + qDebug() << ocrSize << "pictures need OCR index."; + + batchSize = 0; + int ocrFinishNum = 0; + for(QString path : filesNeedOCRIndex) { + fileContentIndexer indexer(path); + if(indexer.index()) { + contentDb.addDocument(indexer.document()); + ++batchSize; + ++ocrFinishNum; + } else { + qDebug() << "Extract fail===" << path; + } + if(batchSize >= 30) { + contentDb.commit(); + qDebug() << "30 finished."; + Q_EMIT progress(IndexType::Contents, allSize, finishNum + ocrFinishNum); + Q_EMIT progress(IndexType::OCR, ocrSize, ocrFinishNum); + batchSize = 0; + } + if(m_stop->load()) { + qDebug() << "Index stopped, interrupt content index."; + break; + } + } + contentDb.commit(); + Q_EMIT progress(IndexType::OCR, ocrSize, ocrFinishNum); + Q_EMIT progress(IndexType::Contents, allSize, finishNum + ocrFinishNum); + filesNeedOCRIndex.clear(); + qDebug() << "Finish OCR index."; + Q_EMIT contentIndexDone(finishNum + ocrFinishNum); + qDebug() << "Finish content index"; +} diff --git a/libsearch/index/first-run-indexer.h b/libsearch/index/first-run-indexer.h new file mode 100644 index 0000000..0d25605 --- /dev/null +++ b/libsearch/index/first-run-indexer.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#ifndef FIRSTRUNINDEXER_H +#define FIRSTRUNINDEXER_H + +#include +#include +#include +#include "common.h" + +namespace UkuiSearch { +class FirstRunIndexer : public QObject, public QRunnable +{ + Q_OBJECT +public: + /** + * @brief The WorkMode enum + * Update 增量更新 + * Add 增加索引目录 + * Rebuild 删除并重建数据库 + */ + enum WorkMode{ + Update = 0, + Add = 1, + Rebuild + }; + Q_DECLARE_FLAGS(WorkModes, WorkMode) + /** + * @brief The Target enum + * 要进行索引的数据库 + * All 所有数据库 + * Basic 基础索引数据库 + * Content 内容索引数据库 + */ + enum Target{ + None = 0, + Basic = 1u << 0, + Content = 1u << 1, + All = Basic | Content + }; + Q_DECLARE_FLAGS(Targets, Target) + + FirstRunIndexer(const QStringList& folders, const QStringList& blackList, QAtomicInt& stop, WorkModes mode = WorkMode::Update, Targets target = Target::All); + ~FirstRunIndexer(); + void run() override; + +Q_SIGNALS: + void progress(IndexType type, uint all, uint finished); + void basicIndexDone(int size); + void contentIndexDone(int size); + void done(); + +private: + void fetch(); + void basicIndex(); + void contentIndex(); + + WorkModes m_mode; + Targets m_target; + QStringList m_folders; + QStringList m_blackList; + QStringList m_cache; + QAtomicInt *m_stop = nullptr; +}; +Q_DECLARE_OPERATORS_FOR_FLAGS(FirstRunIndexer::Targets) +} +#endif // FIRSTRUNINDEXER_H diff --git a/libsearch/index/index-generator.cpp b/libsearch/index/index-generator.cpp deleted file mode 100644 index 6d98da4..0000000 --- a/libsearch/index/index-generator.cpp +++ /dev/null @@ -1,669 +0,0 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Authors: zhangpengfei - * Modified by: zhangzihao - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "file-utils.h" -#include "index-generator.h" -#include "chinese-segmentation.h" - -using namespace UkuiSearch; - -static IndexGenerator *global_instance = nullptr; -QMutex IndexGenerator::m_mutex; -//QVector *UkuiSearch::g_docListForPath; -//QMutex UkuiSearch::g_mutexDocListForPath; -//QVector *UkuiSearch::g_docListForContent; -//QMutex UkuiSearch::g_mutexDocListForContent; -QMutex IndexGenerator::g_mutexDocListForPath; -QMutex IndexGenerator::g_mutexDocListForContent; -QMutex IndexGenerator::g_mutexDocListForOcr; -QVector IndexGenerator::g_docListForPath = QVector(); -QVector IndexGenerator::g_docListForContent = QVector(); -QVector IndexGenerator::g_docListForOcr = QVector(); - - -IndexGenerator *IndexGenerator::getInstance() { - QMutexLocker locker(&m_mutex); - if(!global_instance) { - global_instance = new IndexGenerator(); - } - return global_instance; -} - -//文件名索引 -bool IndexGenerator::creatAllIndex(QQueue > *messageList) { - HandlePathList(messageList); -// if(g_docListForPath == NULL) { -// return false; -// } - if(IndexGenerator::g_docListForPath.isEmpty()) { - return false; - } - qDebug() << "begin creatAllIndex"; - try { - for(auto i : IndexGenerator::g_docListForPath) { - - insertIntoDatabase(i); - } - m_database_path->commit(); - } catch(const Xapian::Error &e) { - qWarning() << "creatAllIndex fail!" << QString::fromStdString(e.get_description()); - //need a record - IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "1"); - return false; - } - qDebug() << "finish creatAllIndex"; - IndexGenerator::g_docListForPath.clear(); - IndexGenerator::g_docListForPath.squeeze(); - QVector().swap(IndexGenerator::g_docListForPath); - -// delete g_docListForPath; -// g_docListForPath = nullptr; - return true; -} -//文件内容索引 -bool IndexGenerator::creatAllIndex(QQueue *messageList) { - HandlePathList(messageList); - qDebug() << "begin creatAllIndex for content"; - if(IndexGenerator::g_docListForContent.isEmpty()) { - return false; - } - int size = IndexGenerator::g_docListForContent.size(); - qDebug() << "begin creatAllIndex for content" << size; - if(!size == 0) { - try { - int count = 0; - for(Document i : IndexGenerator::g_docListForContent) { - if(!i.isRequiredDeleted()) { - m_database_content->replace_document(i.getUniqueTerm(), i.getXapianDocument()); - } else { - m_database_content->delete_document(i.getUniqueTerm()); - } - if(++count > 999) { - count = 0; - m_database_content->commit(); - } - } - m_database_content->commit(); - } catch(const Xapian::Error &e) { - qWarning() << "creat content Index fail!" << QString::fromStdString(e.get_description()); - IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, "1"); - return false; - } - qDebug() << "finish creatAllIndex for content"; - - IndexGenerator::g_docListForContent.clear(); - IndexGenerator::g_docListForContent.squeeze(); - QVector().swap(IndexGenerator::g_docListForContent); - malloc_trim(0); - } - Q_EMIT this->transactionFinished(); - return true; - -} - -bool IndexGenerator::creatOcrIndex(QQueue *messageList) -{ - HandleOcrPathList(messageList); - if(IndexGenerator::g_docListForOcr.isEmpty()) { - return false; - } - int size = IndexGenerator::g_docListForOcr.size(); - qDebug() << "begin creatAllIndex for ocr" << size; - if(!size == 0) { - try { - int count = 0; - for(Document i : IndexGenerator::g_docListForOcr) { - if(!i.isRequiredDeleted()) { - m_database_ocr->replace_document(i.getUniqueTerm(), i.getXapianDocument()); - } else { - m_database_ocr->delete_document(i.getUniqueTerm()); - } - if(++count > 999) { - count = 0; - m_database_ocr->commit(); - } - } - m_database_ocr->commit(); - } catch(const Xapian::Error &e) { - qWarning() << "creat ocr Index fail!" << QString::fromStdString(e.get_description()); - IndexStatusRecorder::getInstance()->setStatus(OCR_DATABASE_STATE, "1"); - return false; - } - qDebug() << "finish creatAllIndex for ocr"; - - IndexGenerator::g_docListForOcr.clear(); - IndexGenerator::g_docListForOcr.squeeze(); - QVector().swap(IndexGenerator::g_docListForOcr); - malloc_trim(0); - } - return true; -} - -IndexGenerator::IndexGenerator(QObject *parent) : QObject(parent) -{ - QDir database(INDEX_PATH); - if(!database.exists()) { - qDebug() << "create index path" << INDEX_PATH<< database.mkpath(INDEX_PATH); - } - database.setPath(CONTENT_INDEX_PATH); - if(!database.exists()) { - qDebug() << "create content index path" << CONTENT_INDEX_PATH << database.mkpath(CONTENT_INDEX_PATH); - } -// database.setPath(OCR_INDEX_PATH); -// if(!database.exists()) { -// qDebug() << "create ocr index path" << OCR_INDEX_PATH << database.mkpath(OCR_INDEX_PATH); -// } - - try { - m_database_path = new Xapian::WritableDatabase(INDEX_PATH.toStdString(), Xapian::DB_CREATE_OR_OPEN); - m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH.toStdString(), Xapian::DB_CREATE_OR_OPEN); -// m_database_ocr = new Xapian::WritableDatabase(OCR_INDEX_PATH.toStdString(), Xapian::DB_CREATE_OR_OPEN); - } catch(const Xapian::Error &e) { - qWarning() << "creat Index fail!" << QString::fromStdString(e.get_description()); - IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); - assert(false); - } -} - -IndexGenerator::~IndexGenerator() { - QMutexLocker locker(&m_mutex); - qDebug() << "~IndexGenerator"; - if(m_database_path) - m_database_path->~WritableDatabase(); -// delete m_database_path; - m_database_path = nullptr; - if(m_database_content) - m_database_content->~WritableDatabase(); -// delete m_database_content; - if(m_database_ocr) - m_database_ocr->~WritableDatabase(); - m_database_path = nullptr; - m_database_content = nullptr; - m_database_ocr = nullptr; - global_instance = nullptr; -// if(m_index_map) -// delete m_index_map; -// m_index_map = nullptr; -// if(mg_docListForPath) -// delete mg_docListForPath; -// mg_docListForPath = nullptr; -// if(mg_docListForContent) -// delete mg_docListForContent; -// mg_docListForContent = nullptr; -// if(m_index_data_path) -// delete m_index_data_path; -// m_index_data_path = nullptr; -// if(m_indexer) -// delete m_indexer; -// m_indexer = nullptr; -// GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2"); -// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2"); -// GlobalSettings::getInstance()->setValue(INDEX_GENERATOR_NORMAL_EXIT, "2"); - - qDebug() << "QThread::currentThreadId()" << QThread::currentThreadId(); - qDebug() << "~IndexGenerator end"; -} - -void IndexGenerator::rebuildIndexDatabase(const QString &path) -{ - QDir database(path); - if(database.exists()) { - qDebug() << "remove" << path << database.removeRecursively(); - } else { - qDebug() << "create index path" << path << database.mkpath(path); - } - if(m_database_path) - m_database_path->~WritableDatabase(); - m_database_path = new Xapian::WritableDatabase(path.toStdString(), Xapian::DB_CREATE_OR_OPEN); -} - -void IndexGenerator::rebuildContentIndexDatabase(const QString &path) -{ - QDir database(path); - if(database.exists()) { - qDebug() << "remove" << path << database.removeRecursively(); - } else { - qDebug() << "create content index path" << path << database.mkpath(path); - } - if(m_database_content) - m_database_content->~WritableDatabase(); - m_database_content = new Xapian::WritableDatabase(path.toStdString(), Xapian::DB_CREATE_OR_OPEN); -} - -void IndexGenerator::rebuildOcrIndexDatabase(const QString &path) -{ - QDir database(path); - if(database.exists()) { - qDebug() << "remove" << path << database.removeRecursively(); - } else { - qDebug() << "create ocr index path" << path << database.mkpath(path); - } - if(m_database_ocr) - m_database_ocr->~WritableDatabase(); - m_database_ocr = new Xapian::WritableDatabase(path.toStdString(), Xapian::DB_CREATE_OR_OPEN); -} - -void IndexGenerator::insertIntoDatabase(Document& doc) { -// qDebug()<< "--index start--"; - Xapian::Document document = doc.getXapianDocument(); -// m_indexer.set_document(document); -// qDebug()<replace_document(doc.getUniqueTerm(), document); -// qDebug()<<"replace doc docid="<(innerId); -// qDebug()<< "--index finish--"; - return; -} -//#define fun(a) a=new ;printf() -void IndexGenerator::insertIntoContentDatabase(Document& doc) { - Xapian::docid innerId = m_database_content->replace_document(doc.getUniqueTerm(), doc.getXapianDocument()); -// qDebug()<<"replace doc docid="<(innerId); -// qDebug()<< "--index finish--"; - return; -} - -void IndexGenerator::HandlePathList(QQueue> *messageList) { - qDebug() << "Begin HandlePathList!"; - qDebug() << messageList->size(); -// qDebug()< future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument); - -// future.waitForFinished(); - -// QList docList = future.results(); -// future.cancel(); -// mg_docListForPath = new QList(docList); - QThreadPool pool; - pool.setMaxThreadCount(((QThread::idealThreadCount() - 1) / 2) + 1); - pool.setExpiryTimeout(100); - ConstructDocumentForPath *constructer; - while(!messageList->isEmpty()) { - constructer = new ConstructDocumentForPath(messageList->dequeue()); - pool.start(constructer); - } - qDebug() << "pool finish" << pool.waitForDone(-1); -// if(constructer) -// delete constructer; -// constructer = nullptr; - -// qDebug()<size(); -// qWarning() << g_docListForPath; -// QList docList = future.results(); -// mg_docListForPath = new QList(docList); -// mg_docListForPath = std::move(future.results()); -// qDebug()< *messageList) { - qDebug() << "Begin HandlePathList for content index!"; - qDebug() << messageList->size(); -// qDebug()<isEmpty()) { - constructer = new ConstructDocumentForContent(messageList->dequeue()); - pool.start(constructer); - } - qDebug() << "pool finish" << pool.waitForDone(-1); - qDebug() << "Finish HandlePathList for content index!"; - return; -} - -void IndexGenerator::HandleOcrPathList(QQueue *messageList) -{ - qDebug() << "Begin HandlePathList for ocr index!"; - qDebug() << messageList->size(); - ConstructDocumentForOcr *constructer; - QThreadPool pool; - pool.setMaxThreadCount(1); - pool.setExpiryTimeout(100); - while(!messageList->isEmpty()) { - constructer = new ConstructDocumentForOcr(messageList->dequeue()); - pool.start(constructer); - } - qDebug() << "pool finish" << pool.waitForDone(-1); - qDebug() << "Finish HandlePathList for content index!"; - return; -} -//deprecated -Document IndexGenerator::GenerateDocument(const QVector &list) { - Document doc; -// qDebug()< term; - KeyWord skw; - Document doc; - QString uniqueterm; - QString upTerm; - QString suffix; - FileReader::getTextContent(path, content, suffix); - - term = ChineseSegmentation::getInstance()->callSegment(content.toStdString()); -// QStringList term = content.split(""); - - doc.setData(content); - doc.setUniqueTerm(uniqueterm); - doc.addTerm(upTerm); - doc.addValue(1, path); - doc.addValue(2, suffix); - for(int i = 0; i < term.size(); ++i) { - doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast(term.at(i).weight)); - } - -// Document doc; -// doc.setData(content); -// doc.setUniqueTerm(uniqueterm); -// doc.addTerm(upTerm); -// doc.addValue(path); -// int pos = 0; -// for(QString i : term) -// { -// doc.addPosting(i.toStdString(),QVector() << ++pos,1); -// } - - content.clear(); - term.clear(); - return doc; -} - -//deprecated -QStringList IndexGenerator::IndexSearch(QString indexText) { - QStringList searchResult; - try { - qDebug() << "--search start--"; - - Xapian::Database db(INDEX_PATH.toStdString()); - Xapian::Enquire enquire(db); - Xapian::QueryParser qp; - qp.set_default_op(Xapian::Query::OP_PHRASE); - qp.set_database(db); - auto userInput = indexText; - - std::string queryStr = indexText.replace("", " ").toStdString(); -// std::string s =db.get_spelling_suggestion(queryStr,10); -// qDebug()<<"spelling_suggestion!"< v; - for(int i = 0; i < userInput.size(); i++) { - v.push_back(Xapian::Query(QString(userInput.at(i)).toStdString())); - qDebug() << userInput.at(i); - qDebug() << QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description()); - } - Xapian::Query queryNear = Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end()); - Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND, queryNear, queryPhrase); - - qDebug() << QString::fromStdString(query.get_description()); - enquire.set_query(query); - - Xapian::MSet result = enquire.get_mset(0, 9999); - qDebug() << "find results count=" << static_cast(result.get_matches_estimated()); - -// QStringList *pathTobeDelete = new QStringList; - - //get search result - for(auto it = result.begin(); it != result.end(); ++it) { - Xapian::Document doc = it.get_document(); - std::string data = doc.get_data(); - Xapian::weight docScoreWeight = it.get_weight(); - Xapian::percent docScorePercent = it.get_percent(); -// QFileInfo *info = new QFileInfo(QString::fromStdString(data)); - QFileInfo info(QString::fromStdString(data)); - - if(!info.exists()) { -// pathTobeDelete->append(QString::fromStdString(data)); - qDebug() << QString::fromStdString(data) << "is not exist!!"; - } else { - searchResult.append(QString::fromStdString(data)); - } - - qDebug() << "doc=" << QString::fromStdString(data) << ",weight=" << docScoreWeight << ",percent=" << docScorePercent; - } -// //Delete those path doc which is not already exist. -// if(!pathTobeDelete->isEmpty()) -// deleteAllIndex(pathTobeDelete); - - qDebug() << "--search finish--"; - } catch(const Xapian::Error &e) { - qDebug() << QString::fromStdString(e.get_description()); - } - return searchResult; -} - -bool IndexGenerator::deleteAllIndex(QStringList *pathlist) { - QStringList *list = pathlist; - if(list->isEmpty()) - return true; - try { - qDebug() << "--delete start--"; - for(int i = 0; i < list->size(); i++) { - QString doc = list->at(i); - std::string uniqueterm = FileUtils::makeDocUterm(doc); - std::string upterm = "ZEEKERUPTERM" + FileUtils::makeDocUterm(doc); - - m_database_path->delete_document(uniqueterm); - m_database_content->delete_document(uniqueterm); -// m_database_ocr->delete_document(uniqueterm); - - //delete all files under it if it's a dir. - m_database_path->delete_document(upterm); - m_database_content->delete_document(upterm); -// m_database_ocr->delete_document(upterm); - - qDebug() << "delete path" << doc; -// qDebug() << "delete md5" << QString::fromStdString(uniqueterm); - - // qDebug()<<"m_database_path->get_lastdocid()!!!"<get_lastdocid(); - // qDebug()<<"m_database_path->get_doccount()!!!"<get_doccount(); - } - m_database_path->commit(); - m_database_content->commit(); -// m_database_ocr->commit(); - qDebug() << "--delete finish--"; - } catch(const Xapian::Error &e) { - qWarning() << QString::fromStdString(e.get_description()); - return false; - } - - Q_EMIT this->transactionFinished(); - return true; -} - -bool IndexGenerator::deleteContentIndex(QStringList *pathlist) -{ - if(pathlist->isEmpty()) - return true; - try { - qDebug() << "--delete start--"; - for(int i = 0; i < pathlist->size(); i++) { - QString doc = pathlist->at(i); - std::string uniqueterm = FileUtils::makeDocUterm(doc); - m_database_content->delete_document(uniqueterm); - qDebug() << "delete path" << doc; - } - m_database_content->commit(); - qDebug() << "--delete finish--"; - } catch(const Xapian::Error &e) { - qWarning() << QString::fromStdString(e.get_description()); - return false; - } - return true; -} - -bool IndexGenerator::deleteOcrIndex(QStringList *pathlist) -{ - if(pathlist->isEmpty()) - return true; - try { - qDebug() << "--delete start--"; - for(int i = 0; i < pathlist->size(); i++) { - QString doc = pathlist->at(i); - std::string uniqueterm = FileUtils::makeDocUterm(doc); - m_database_ocr->delete_document(uniqueterm); - qDebug() << "delete path" << doc; - } - m_database_ocr->commit(); - qDebug() << "--delete finish--"; - } catch(const Xapian::Error &e) { - qWarning() << QString::fromStdString(e.get_description()); - return false; - } - return true; -} - -bool IndexGenerator::updateIndex(QVector *pendingFiles) -{ - - QQueue> *fileIndexInfo = new QQueue>; - QQueue *fileContentIndexInfo = new QQueue; - //QQueue *fileOcrIndexInfo = new QQueue; - QStringList *deleteList = new QStringList; - QStringList *contentDeleteList = new QStringList; - for (PendingFile file : *pendingFiles) { - if (file.shouldRemoveIndex()) { - deleteList->append(file.path()); - continue; - } - fileIndexInfo->append(QVector() << file.path().section("/" , -1) - << file.path() << QString(file.isDir() ? "1" : "0") - << QFileInfo(file.path()).lastModified().toString("yyyyMMddHHmmss")); - if (file.path().split(".").isEmpty()){ - continue; - } - if (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()]) { - if (!FileUtils::isEncrypedOrUnreadable(file.path())) { - fileContentIndexInfo->append(file.path()); - } else { - contentDeleteList->append(file.path()); - } - } else if (true == targetPhotographTypeMap[file.path().section("/" , -1) .split(".").last()]) { - if (FileUtils::isOcrSupportSize(file.path())) { - fileContentIndexInfo->append(file.path()); - } - } - } - if (!deleteList->isEmpty()) { - deleteAllIndex(deleteList); - } - if (!contentDeleteList->isEmpty()) { - deleteContentIndex(contentDeleteList); - } - if (!fileIndexInfo->isEmpty()) { - creatAllIndex(fileIndexInfo); - } - if (!fileContentIndexInfo->isEmpty()) { - creatAllIndex(fileContentIndexInfo); - } - //if (!fileOcrIndexInfo->isEmpty()) { - // creatOcrIndex(fileOcrIndexInfo); - //} - if (fileIndexInfo) { - delete fileIndexInfo; - fileIndexInfo = nullptr; - } - if (fileContentIndexInfo) { - delete fileContentIndexInfo; - fileContentIndexInfo = nullptr; - } - //if (fileOcrIndexInfo) { - // delete fileOcrIndexInfo; - // fileOcrIndexInfo = nullptr; - //} - if (deleteList) { - delete deleteList; - deleteList = nullptr; - } - if (contentDeleteList) { - delete contentDeleteList; - contentDeleteList = nullptr; - } - - return true; -} - diff --git a/libsearch/index/index-generator.h b/libsearch/index/index-generator.h deleted file mode 100644 index 358e6e2..0000000 --- a/libsearch/index/index-generator.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Authors: zhangpengfei - * - */ -#ifndef INDEXGENERATOR_H -#define INDEXGENERATOR_H - -#include -#include -//#include -#include -#include -#include -#include -#include -//#include -#include "construct-document.h" -#include "index-status-recorder.h" -#include "document.h" -#include "file-reader.h" -#include "common.h" -#include "pending-file.h" - -namespace UkuiSearch { -//extern QVector *_doc_list_path; -//extern QMutex _mutex_doc_list_path; -//extern QVector *_doc_list_content; -//extern QMutex _mutex_doc_list_content; - -class IndexGenerator : public QObject { - friend class ConstructDocumentForPath; - friend class ConstructDocumentForContent; - friend class ConstructDocumentForOcr; - Q_OBJECT -public: - static IndexGenerator *getInstance(); - ~IndexGenerator(); - - void rebuildIndexDatabase(const QString &path = INDEX_PATH); - void rebuildContentIndexDatabase(const QString &path = CONTENT_INDEX_PATH); - void rebuildOcrIndexDatabase(const QString &path = OCR_INDEX_PATH); -// Q_INVOKABLE void appendDocListPath(Document doc); - //for search test - static QStringList IndexSearch(QString indexText); - void setSynonym(); -Q_SIGNALS: - void transactionFinished(); - void searchFinish(); -public Q_SLOTS: - bool creatAllIndex(QQueue> *messageList); - bool creatAllIndex(QQueue *messageList); - bool creatOcrIndex(QQueue *messageList); - bool deleteAllIndex(QStringList *pathlist); - bool deleteContentIndex(QStringList *pathlist); - bool deleteOcrIndex(QStringList *pathlist); - bool updateIndex(QVector *pendingFiles); - -private: - explicit IndexGenerator(QObject *parent = nullptr); - static QMutex m_mutex; - //For file name index - void HandlePathList(QQueue > *messageList); - //For file content index - void HandlePathList(QQueue *messageList); - //For ocr index - void HandleOcrPathList(QQueue *messageList); - static Document GenerateDocument(const QVector &list); - static Document GenerateContentDocument(const QString &list); - //add one data in database - void insertIntoDatabase(Document& doc); - void insertIntoContentDatabase(Document& doc); - - static QVector g_docListForPath; - static QMutex g_mutexDocListForPath; - static QVector g_docListForContent; - static QMutex g_mutexDocListForContent; - static QVector g_docListForOcr; - static QMutex g_mutexDocListForOcr; - QMap m_index_map; - QString m_index_data_path; - Xapian::WritableDatabase* m_database_path = nullptr; - Xapian::WritableDatabase* m_database_content = nullptr; - Xapian::WritableDatabase* m_database_ocr = nullptr; - std::string m_docstr; - std::string m_index_text_str; - Xapian::TermGenerator m_indexer; -}; -} - -#endif // INDEXGENERATOR_H diff --git a/libsearch/index/index-scheduler.cpp b/libsearch/index/index-scheduler.cpp new file mode 100644 index 0000000..a3af901 --- /dev/null +++ b/libsearch/index/index-scheduler.cpp @@ -0,0 +1,217 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#include "index-scheduler.h" +#include "index-updater.h" +#include "first-run-indexer.h" +using namespace UkuiSearch; +IndexScheduler::IndexScheduler(QObject *parent) : + QObject(parent), + m_statusRecorder(IndexStatusRecorder::getInstance()), + m_config(FileIndexerConfig::getInstance()), + m_state(Startup), + m_stop(0) +{ + qRegisterMetaType("IndexerState"); + m_threadPool.setMaxThreadCount(1); + connect(&m_fileWatcher, &FileWatcher::filesUpdate, this, &IndexScheduler::updateIndex); + connect(m_config, &FileIndexerConfig::fileIndexEnableStatusChanged, this, &IndexScheduler::fileIndexEnable); + connect(m_config, &FileIndexerConfig::appendIndexDir, this, &IndexScheduler::addNewPath); + connect(m_config, &FileIndexerConfig::removeIndexDir, this, &IndexScheduler::removeIndex); + m_state = Startup; + if(m_config->isFileIndexEnable()) { + scheduleIndexing(); + } else { + m_stop.fetchAndStoreRelaxed(1); + } +} + +void IndexScheduler::addNewPath(const QString &folders, const QStringList &blackList) +{ + if(m_stop.load()) { + qDebug() << "Index Scheduler is being stopped, add operation will be executed when started up next time."; + return; + } + m_isAddNewPathFinished = false; + m_state = Running; + FirstRunIndexer::Targets target = FirstRunIndexer::Target::None; + if(m_config->isFileIndexEnable()) { + target |= FirstRunIndexer::Target::Basic; + } + if(m_config->isContentIndexEnable()) { + target |= FirstRunIndexer::Target::Content; + } + if(FirstRunIndexer::Target::None != target) { + FirstRunIndexer *indexer = new FirstRunIndexer(QStringList(folders), blackList, m_stop, FirstRunIndexer::WorkMode::Add, target); + connect(indexer, &FirstRunIndexer::done, this, &IndexScheduler::addNewPathFinished, Qt::QueuedConnection); + m_threadPool.start(indexer); + } +} + +void IndexScheduler::removeIndex(const QString &folders) +{ + if(m_stop.load()) { + qDebug() << "Index Scheduler is being stopped, remove operation will be executed when started up next time."; + return; + } + m_fileWatcher.removeWatch(folders, true); +} + +void IndexScheduler::stop() +{ + m_stop.fetchAndStoreRelaxed(1); + m_fileWatcher.removeWatch(); + m_threadPool.clear(); + m_state = Stop; + qDebug() << "Index scheduler has been stopped."; + Q_EMIT stateChange(m_state); +} + +void IndexScheduler::scheduleIndexing() +{ + if(!m_isFirstRunFinished) { + return; + } + + m_isFirstRunFinished = false; + m_stop.fetchAndStoreRelaxed(0); + m_state = Running; + Q_EMIT stateChange(m_state); + FirstRunIndexer::Targets rebuiltTarget = checkAndRebuild(); + + FirstRunIndexer::WorkModes mode = FirstRunIndexer::WorkMode::Update; + FirstRunIndexer::Targets target = FirstRunIndexer::Target::None; + + //如果数据库被执行过重建,那么跳过增量更新步骤。 + if(m_config->isFileIndexEnable() && !(rebuiltTarget & FirstRunIndexer::Target::Basic)) { + target |= FirstRunIndexer::Target::Basic; + m_statusRecorder->setStatus(INDEX_DATABASE_STATE, IndexStatusRecorder::State::Updating); + } + if(m_config->isContentIndexEnable() && !(rebuiltTarget & FirstRunIndexer::Target::Content)) { + target |= FirstRunIndexer::Target::Content; + m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE, IndexStatusRecorder::State::Updating); + } + startIndexJob(mode, target); + + //启动监听 + m_fileWatcher.installWatches(); +} + +IndexScheduler::IndexerState IndexScheduler::getIndexState() +{ + return m_state; +} + +FirstRunIndexer::Targets IndexScheduler::checkAndRebuild() +{ + FirstRunIndexer::WorkModes mode = FirstRunIndexer::WorkMode::Rebuild; + FirstRunIndexer::Targets target = FirstRunIndexer::Target::None; + if(m_statusRecorder->getStatus(INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Error && m_config->isFileIndexEnable()) { + qDebug() << "Basic database error,need rebuild"; + target |= FirstRunIndexer::Target::Basic; + m_statusRecorder->setStatus(INDEX_DATABASE_STATE, IndexStatusRecorder::State::Initializing); + } + + if(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Error && m_config->isFileIndexEnable()) { + qDebug() << "Content database error,need rebuild"; + target |= FirstRunIndexer::Target::Content; + m_statusRecorder->setStatus(INDEX_DATABASE_STATE, IndexStatusRecorder::State::Initializing); + } + startIndexJob(mode, target); + return target; +} + +void IndexScheduler::startIndexJob(FirstRunIndexer::WorkModes &mode, FirstRunIndexer::Targets &target) +{ + if(FirstRunIndexer::Target::None != target) { + FirstRunIndexer *indexer = new FirstRunIndexer(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex(), m_stop, mode, target); + connect(indexer, &FirstRunIndexer::done, this, &IndexScheduler::firstRunFinished, Qt::QueuedConnection); + connect(indexer, &FirstRunIndexer::progress, this, &IndexScheduler::process); + + connect(indexer, &FirstRunIndexer::basicIndexDone, this, [&](uint size){ + bool success = false; + if(!(m_statusRecorder->getStatus(INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Error)) { + m_statusRecorder->setStatus(INDEX_DATABASE_STATE, IndexStatusRecorder::State::Ready); + success = true; + } + Q_EMIT basicIndexDone(size, success); + }); + + connect(indexer, &FirstRunIndexer::contentIndexDone, this, [&](uint size){ + bool success = false; + if(!(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Error)) { + m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE, IndexStatusRecorder::State::Ready); + success = true; + } + Q_EMIT contentIndexDone(size, success); + }); + + m_threadPool.start(indexer); + } +} + +void IndexScheduler::fileIndexEnable(bool enable) +{ + if(enable) { + scheduleIndexing(); + } else { + stop(); + } +} + +void IndexScheduler::updateIndex(const QVector &files) +{ + qDebug() << "updateIndex====="; + m_isUpdateFinished = false; + m_state = Running; + IndexUpdater *updateJob = new IndexUpdater(files, m_stop); + connect(updateJob, &IndexUpdater::done, this, &IndexScheduler::updateFinished, Qt::QueuedConnection); + m_threadPool.start(updateJob); +} + +void IndexScheduler::firstRunFinished() +{ + if((m_statusRecorder->getStatus(INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Ready) + && m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Ready) { + m_isFirstRunFinished = true; + } + if(m_isFirstRunFinished && m_isAddNewPathFinished && m_isUpdateFinished) { + m_state = Idle; + Q_EMIT stateChange(m_state); + } +} + +void IndexScheduler::updateFinished() +{ + m_isUpdateFinished = true; + if(m_isFirstRunFinished && m_isAddNewPathFinished) { + m_state = Idle; + Q_EMIT stateChange(m_state); + } +} + +void IndexScheduler::addNewPathFinished() +{ + m_isAddNewPathFinished = true; + if(m_isFirstRunFinished && m_isUpdateFinished) { + m_state = Idle; + Q_EMIT stateChange(m_state); + } +} + diff --git a/libsearch/index/index-scheduler.h b/libsearch/index/index-scheduler.h new file mode 100644 index 0000000..d89955b --- /dev/null +++ b/libsearch/index/index-scheduler.h @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#ifndef INDEXSCHEDULER_H +#define INDEXSCHEDULER_H + +#include +#include +#include +#include "file-watcher.h" +#include "index-status-recorder.h" +#include "common.h" +#include "first-run-indexer.h" +namespace UkuiSearch { + +class IndexScheduler : public QObject +{ + Q_OBJECT + +public: + enum IndexerState { + Startup, + Running, + Idle, + Stop + }; + Q_ENUM(IndexerState) + + explicit IndexScheduler(QObject *parent = nullptr); + /** + * @brief addNewPath + * @param folders 要添加索引的目录 + * @param blackList 要添加索引的目录下的黑名单 + */ + Q_SCRIPTABLE void addNewPath(const QString &folders, const QStringList& blackList = QStringList()); + /** + * @brief removeIndex + * @param folders 要移除索引的目录 + */ + Q_SCRIPTABLE void removeIndex(const QString& folders); + Q_SCRIPTABLE void stop(); + Q_SCRIPTABLE void scheduleIndexing(); + Q_SCRIPTABLE IndexerState getIndexState(); + +Q_SIGNALS: + void stateChange(IndexerState); + void process(IndexType type, uint all, uint finished); + void basicIndexDone(uint size, bool success); + void contentIndexDone(uint size, bool success); + void done(); + +private Q_SLOTS: + void fileIndexEnable(bool enable); + void updateIndex(const QVector& files); + void firstRunFinished(); + void updateFinished(); + void addNewPathFinished(); + +private: + /** + * @brief checkAndRebuild + * 检查数据库状态,数据库状态处于 IndexStatusRecorder::State::Error 时,开始重建任务。 + * @return 返回需要重建的数据库 + */ + FirstRunIndexer::Targets checkAndRebuild(); + void startIndexJob(FirstRunIndexer::WorkModes &mode, FirstRunIndexer::Targets &target); + FileWatcher m_fileWatcher; + IndexStatusRecorder *m_statusRecorder = nullptr; + FileIndexerConfig *m_config = nullptr; + IndexerState m_state; + QAtomicInt m_stop; + QThreadPool m_threadPool; + + bool m_isFirstRunFinished = true; + bool m_isUpdateFinished = true; + bool m_isAddNewPathFinished = true; +}; +} +#endif // INDEXSCHEDULER_H diff --git a/libsearch/index/index-status-recorder.cpp b/libsearch/index/index-status-recorder.cpp index f09dcda..1b9ca55 100644 --- a/libsearch/index/index-status-recorder.cpp +++ b/libsearch/index/index-status-recorder.cpp @@ -1,3 +1,22 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ #include "index-status-recorder.h" #include @@ -12,10 +31,15 @@ IndexStatusRecorder *IndexStatusRecorder::getInstance() return m_instance; } -void IndexStatusRecorder::setStatus(const QString &key, const QVariant &value) +IndexStatusRecorder::IndexStatusRecorder(QObject *parent) : QObject(parent) +{ + m_status = new QSettings(INDEX_STATUS, QSettings::IniFormat, this); +} + +void IndexStatusRecorder::setStatus(const QString &key, State state) { m_mutex.lock(); - m_status->setValue(key, value); + m_status->setValue(key, state); m_status->sync(); m_mutex.unlock(); } @@ -31,7 +55,11 @@ bool IndexStatusRecorder::indexDatabaseEnable() m_mutex.lock(); m_status->sync(); m_mutex.unlock(); - return m_status->value(INDEX_DATABASE_STATE, QVariant(false)).toBool(); + if(m_status->value(INDEX_DATABASE_STATE, 0).toInt() == State::Ready) { + return true; + } else { + return false; + } } @@ -40,20 +68,9 @@ bool IndexStatusRecorder::contentIndexDatabaseEnable() m_mutex.lock(); m_status->sync(); m_mutex.unlock(); - return m_status->value(CONTENT_INDEX_DATABASE_STATE, QVariant(false)).toBool(); - -} - -bool IndexStatusRecorder::ocrDatabaseEnable() -{ - m_mutex.lock(); - m_status->sync(); - m_mutex.unlock(); - return m_status->value(OCR_DATABASE_STATE, QVariant(false)).toBool(); - -} - -IndexStatusRecorder::IndexStatusRecorder(QObject *parent) : QObject(parent) -{ - m_status = new QSettings(INDEX_STATUS, QSettings::IniFormat, this); + if(m_status->value(CONTENT_INDEX_DATABASE_STATE, 0).toInt() == State::Ready) { + return true; + } else { + return false; + } } diff --git a/libsearch/index/index-status-recorder.h b/libsearch/index/index-status-recorder.h index 7a61a1b..bf72776 100644 --- a/libsearch/index/index-status-recorder.h +++ b/libsearch/index/index-status-recorder.h @@ -1,3 +1,22 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ #ifndef INDEXSTATUSRECORDER_H #define INDEXSTATUSRECORDER_H @@ -7,9 +26,6 @@ #include #define CONTENT_INDEX_DATABASE_STATE "content_index_database_state" #define INDEX_DATABASE_STATE "index_database_state" -#define OCR_DATABASE_STATE "ocr_database_state" -#define INOTIFY_NORMAL_EXIT "inotify_normal_exit" // 1 - 出错;2 - 正常;3-关闭索引; 0-有信号正在处理 -#define PENDING_FILE_QUEUE_FINISH "pending_file_queue_finish" #define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf" namespace UkuiSearch { //fixme: we need a better way to record index status. @@ -17,17 +33,24 @@ class IndexStatusRecorder : public QObject { Q_OBJECT public: + enum State{ + Initializing = 0, + Error = 1, + Ready = 2, + Updating = 3 + }; + Q_ENUM(State) + static IndexStatusRecorder *getInstance(); - void setStatus(const QString& key, const QVariant& value); + void setStatus(const QString& key, UkuiSearch::IndexStatusRecorder::State state); const QVariant getStatus(const QString& key); bool indexDatabaseEnable(); bool contentIndexDatabaseEnable(); - bool ocrDatabaseEnable(); private: explicit IndexStatusRecorder(QObject *parent = nullptr); static IndexStatusRecorder *m_instance; - QSettings *m_status; + QSettings *m_status = nullptr; QMutex m_mutex; }; } diff --git a/libsearch/index/index-updater.cpp b/libsearch/index/index-updater.cpp new file mode 100644 index 0000000..eff61d2 --- /dev/null +++ b/libsearch/index/index-updater.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#include "index-updater.h" +#include +#include "writable-database.h" +#include "basic-indexer.h" +#include "file-indexer-config.h" +#include "file-content-indexer.h" +#include "common.h" +#include "file-utils.h" +using namespace UkuiSearch; +IndexUpdater::IndexUpdater(const QVector& files, QAtomicInt &stop) + : m_cache(files), + m_stop(&stop) +{ +} + +void IndexUpdater::UpdateIndex() +{ + if(FileIndexerConfig::getInstance()->isFileIndexEnable()) { + WritableDatabase basicDb(DataBaseType::Basic); + if(!basicDb.open()) { + qWarning() << "Basic db open failed, fail to update index"; + return; + } + qDebug() << "===update basic index==="; + for(PendingFile file : m_cache) { + if(file.shouldRemoveIndex()) { + qDebug() << "| remove:" <isContentIndexEnable()) { + if(m_stop->load()) { + qDebug() << "Index stopped, abort update content index."; + return; + } + WritableDatabase contentDb(DataBaseType::Content); + if(!contentDb.open()) { + qWarning() << "Content db open failed, fail to update index"; + return; + } + + QMap suffixMap = targetFileTypeMap; + //ocr + if(FileIndexerConfig::getInstance()->isOCREnable()) { + suffixMap.unite(targetPhotographTypeMap); + } + qDebug() << "===update content index==="; + int size = 0; + for(PendingFile file : m_cache) { + QString suffix = QFileInfo(file.path()).suffix(); + if(file.shouldRemoveIndex()) { + qDebug() << "| remove:" <= 30) { + contentDb.commit(); + qDebug() << "30 finished."; + size = 0; + } + if(m_stop->load()) { + qDebug() << "Index stopped, content index update interrupted"; + m_cache.clear(); + m_cache.shrink_to_fit(); + malloc_trim(0); + return; + } + } + contentDb.commit(); + qDebug() << "===finish update content index==="; + } + m_cache.clear(); + m_cache.shrink_to_fit(); + malloc_trim(0); + Q_EMIT done(); +} + +void IndexUpdater::run() +{ + UpdateIndex(); +} diff --git a/libsearch/index/traverse-bfs.h b/libsearch/index/index-updater.h similarity index 52% rename from libsearch/index/traverse-bfs.h rename to libsearch/index/index-updater.h index 290ff7b..1c14819 100644 --- a/libsearch/index/traverse-bfs.h +++ b/libsearch/index/index-updater.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020, KylinSoft Co., Ltd. + * Copyright (C) 2022, KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -14,33 +14,34 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . * - * Authors: zhangzihao + * Authors: iaom * */ -#ifndef TRAVERSE_BFS_H -#define TRAVERSE_BFS_H +#ifndef INDEXUPDATER_H +#define INDEXUPDATER_H - -#include -#include -#include +#include +#include +#include +#include +#include "pending-file.h" namespace UkuiSearch { -class TraverseBFS { + +class IndexUpdater : public QObject, public QRunnable +{ + Q_OBJECT public: - TraverseBFS() = default; - void Traverse(); - virtual ~TraverseBFS() = default; - virtual void work(const QFileInfo&) = 0; - void setPath(const QStringList&); - void setBlockPath(const QStringList &pathList); -protected: - TraverseBFS(const QStringList&); - QStringList m_pathList; - QStringList m_blockList; + explicit IndexUpdater(const QVector& files, QAtomicInt& stop); + void run() override; + +Q_SIGNALS: + void done(); + private: - TraverseBFS(const TraverseBFS&) = delete; - void operator=(const TraverseBFS&) = delete; + void UpdateIndex(); + + QVector m_cache; + QAtomicInt *m_stop = nullptr; }; } - -#endif // TRAVERSE_BFS_H +#endif // INDEXUPDATER_H diff --git a/libsearch/index/index.pri b/libsearch/index/index.pri index b322c79..cdd3b31 100644 --- a/libsearch/index/index.pri +++ b/libsearch/index/index.pri @@ -1,36 +1,45 @@ INCLUDEPATH += $$PWD HEADERS += \ - $$PWD/construct-document.h \ + $$PWD/basic-indexer.h \ + $$PWD/database.h \ $$PWD/document.h \ +# $$PWD/file-iterator.h \ + $$PWD/file-content-indexer.h \ $$PWD/file-reader.h \ $$PWD/file-search-plugin.h \ - $$PWD/first-index.h \ - $$PWD/index-generator.h \ + $$PWD/first-run-indexer.h \ + $$PWD/index-scheduler.h \ $$PWD/index-status-recorder.h \ - $$PWD/inotify-watch.h \ + $$PWD/monitor.h \ $$PWD/ocrobject.h \ $$PWD/pending-file-queue.h \ $$PWD/pending-file.h \ $$PWD/search-manager.h \ - $$PWD/file-index-manager.h \ - $$PWD/traverse-bfs.h \ - $$PWD/ukui-search-qdbus.h + $$PWD/ukui-search-qdbus.h \ + $$PWD/file-indexer-config.h \ + $$PWD/file-watcher.h \ + $$PWD/index-updater.h \ + $$PWD/writable-database.h SOURCES += \ - $$PWD/construct-document.cpp \ + $$PWD/basic-indexer.cpp \ + $$PWD/database.cpp \ $$PWD/document.cpp \ - $$PWD/file-index-manager.cpp \ + $$PWD/file-content-indexer.cpp \ $$PWD/file-reader.cpp \ $$PWD/file-search-plugin.cpp \ - $$PWD/first-index.cpp \ - $$PWD/index-generator.cpp \ + $$PWD/first-run-indexer.cpp \ + $$PWD/index-scheduler.cpp \ $$PWD/index-status-recorder.cpp \ - $$PWD/inotify-watch.cpp \ + $$PWD/monitor.cpp \ $$PWD/ocrobject.cpp \ $$PWD/pending-file-queue.cpp \ $$PWD/pending-file.cpp \ $$PWD/search-manager.cpp \ - $$PWD/traverse-bfs.cpp \ - $$PWD/ukui-search-qdbus.cpp + $$PWD/ukui-search-qdbus.cpp \ + $$PWD/file-indexer-config.cpp \ + $$PWD/file-watcher.cpp \ + $$PWD/index-updater.cpp \ + $$PWD/writable-database.cpp diff --git a/libsearch/index/inotify-watch.cpp b/libsearch/index/inotify-watch.cpp deleted file mode 100644 index 7a02f15..0000000 --- a/libsearch/index/inotify-watch.cpp +++ /dev/null @@ -1,512 +0,0 @@ -#include "inotify-watch.h" -#include -#include -#include -#include -#include "dir-watcher.h" -using namespace UkuiSearch; -static InotifyWatch* global_instance_InotifyWatch = nullptr; - -UkuiSearch::InotifyWatch *UkuiSearch::InotifyWatch::getInstance() -{ - if(!global_instance_InotifyWatch) { - global_instance_InotifyWatch = new InotifyWatch(); - } - return global_instance_InotifyWatch; -} - -UkuiSearch::InotifyWatch::InotifyWatch(): TraverseBFS(), m_semaphore(INDEX_SEM, 0, QSystemSemaphore::AccessMode::Open) -{ - qDebug() << "setInotifyMaxUserWatches start"; - UkuiSearchQDBus usQDBus; - usQDBus.setInotifyMaxUserWatches(); - qDebug() << "setInotifyMaxUserWatches end"; - m_sharedMemory = new QSharedMemory("ukui-search-shared-map", this); -} - -InotifyWatch::~InotifyWatch() -{ - if(m_notifier) - delete m_notifier; - m_notifier = nullptr; -} - -bool InotifyWatch::addWatch(const QString &path) -{ - int ret = inotify_add_watch(m_inotifyFd, path.toStdString().c_str(), (IN_MOVED_FROM | IN_MOVED_TO | IN_CREATE | IN_DELETE | IN_MODIFY)); - if(ret == -1) { - qWarning() << "AddWatch error:" << path; - return false; - } - m_pathMap[ret] = path; -// qDebug() << "Watch: " << path << "ret: " << ret; - return true; -} - -bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase) -{ - inotify_rm_watch(m_inotifyFd, m_pathMap.key(path)); - - if(removeFromDatabase) { - for(QMap::Iterator i = m_pathMap.begin(); i != m_pathMap.end();) { - // qDebug() << i.value(); - // if(i.value().length() > path.length()) { - if(FileUtils::isOrUnder(i.value(), path)) { - qDebug() << "remove path: " << i.value(); - inotify_rm_watch(m_inotifyFd, m_pathMap.key(path)); - PendingFile f(i.value()); - f.setDeleted(); - f.setIsDir(); - PendingFileQueue::getInstance()->enqueue(f); - m_pathMap.erase(i++); - } else { - i++; - } - } - } else { - for(QMap::Iterator i = m_pathMap.begin(); i != m_pathMap.end();) { - // qDebug() << i.value(); - if(i.value().length() > path.length()) { - if(FileUtils::isOrUnder(i.value(), path)) { -// if(i.value().startsWith(path + "/")) { -// qDebug() << "remove path: " << i.value(); - inotify_rm_watch(m_inotifyFd, m_pathMap.key(path)); - m_pathMap.erase(i++); - } else { - i++; - } - } else { - i++; - } - } - } - m_pathMap.remove(m_pathMap.key(path)); - return true; -} - -void InotifyWatch::work(const QFileInfo &info) -{ - qDebug() << info.fileName() << "-------" << info.absoluteFilePath(); - if(info.isDir() && (!info.isSymLink())) { - this->addWatch(info.absoluteFilePath()); - } - PendingFile f(info.absoluteFilePath()); - if(info.isDir()) { - f.setIsDir(); - } - PendingFileQueue::getInstance()->enqueue(f); -} - -void InotifyWatch::firstTraverse(QStringList pathList, QStringList blockList) -{ - QMutexLocker locker(&m_pathMapLock); - if(pathList.isEmpty()) { - pathList = m_pathList; - } - if(blockList.isEmpty()) { - blockList = m_blockList; - } - - QQueue bfs; - for(QString blockPath : blockList) { - for(QString path : pathList) { - if(FileUtils::isOrUnder(path, blockPath)) { - pathList.removeOne(path); - } - } - } - for(QString path : pathList) { - addWatch(path); - bfs.enqueue(path); - } - - QFileInfoList list; - QDir dir; - QStringList tmpList = blockList; - dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot); - dir.setSorting(QDir::DirsFirst); - while(!bfs.empty()) { - dir.setPath(bfs.dequeue()); - list = dir.entryInfoList(); - for(auto i : list) { - bool isBlocked = false; - for(QString path : tmpList) { - if(i.absoluteFilePath() == path) { - isBlocked = true; - tmpList.removeOne(path); - break; - } - } - if(isBlocked) - continue; - - if(i.isDir() && (!(i.isSymLink()))) { - addWatch(i.absoluteFilePath()); - bfs.enqueue(i.absoluteFilePath()); - } - } - } -} - -void InotifyWatch::addIndexPath(const QString path, const QStringList blockList) -{ - this->firstTraverse(QStringList() << path, blockList); -} - -void InotifyWatch::removeIndexPath(const QString &path, bool fileIndexEnable) -{ - QMutexLocker locker(&m_pathMapLock); - if(fileIndexEnable) { - removeWatch(path, true); - }else { - for(QMap::Iterator i = m_pathMap.begin(); i != m_pathMap.end();) { - if(FileUtils::isOrUnder(i.value(), path)) { - qDebug() << "remove path: " << i.value(); - PendingFile f(i.value()); - f.setDeleted(); - f.setIsDir(); - PendingFileQueue::getInstance()->enqueue(f); - m_pathMap.erase(i++); - } else { - i++; - } - } - } - PendingFileQueue::getInstance()->forceFinish(); - PendingFileQueue::getInstance()->~PendingFileQueue(); -} - -void InotifyWatch::stopWatch() -{ -// if(this->isRunning()) { -// this->quit(); -// if(m_notifier) -// delete m_notifier; -// m_notifier = nullptr; -// removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false); -// } - -// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3"); -} - -void InotifyWatch::run() -{ - m_inotifyFd = inotify_init(); - if (m_inotifyFd > 0) { - qDebug()<<"Inotify init success!"; - } else { - qWarning() << "Inotify init fail! Now try add inotify_user_instances."; - UkuiSearchQDBus usQDBus; - usQDBus.addInotifyUserInstances(128); - m_inotifyFd = inotify_init(); - if (m_inotifyFd > 0) { - qDebug()<<"Inotify init success!"; - } else { - printf("errno=%d\n",errno); - printf("Mesg:%s\n",strerror(errno)); - Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify"); - } - } - - setPath(DirWatcher::getDirWatcher()->currentIndexableDir()); - setBlockPath(DirWatcher::getDirWatcher()->currentBlackListOfIndex()); - firstTraverse(); - - while(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) { - fd_set fds; - FD_ZERO(&fds); - FD_SET(m_inotifyFd, &fds); - int rc; - rc = select(m_inotifyFd + 1, &fds, NULL, NULL, NULL); - if(rc > 0) { - ++FileUtils::indexStatus; - int avail; - if (ioctl(m_inotifyFd, FIONREAD, &avail) == EINVAL) { - qWarning() << "Did not receive an entire inotify event."; - --FileUtils::indexStatus; - return; - } - - char* buf = (char*)malloc(avail); - memset(buf, 0x00, avail); - - const ssize_t len = read(m_inotifyFd, buf, avail); - if(len != avail) { - qWarning()<<"read event error"; - // IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); - } - - int i = 0; - while (i < len) { - const struct inotify_event* event = (struct inotify_event*)&buf[i]; - if(event->name[0] != '.') { - // qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask; - // qDebug("mask:0x%x,",event->mask); - break; - } - i += sizeof(struct inotify_event) + event->len; - } - if(i < len ) { - qDebug() << "fork"; - slotEvent(buf, len); - } - free(buf); - --FileUtils::indexStatus; - } else if(rc < 0) { - // error - qWarning() << "select result < 0, error!"; - IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); - assert(false); - } - } - qDebug() << "Leave watch loop"; - if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) { - IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3"); - for(QString path : m_pathMap) { - inotify_rm_watch(m_inotifyFd, m_pathMap.key(path)); - } - m_pathMap.clear(); - } - close(m_inotifyFd); -// fcntl(m_inotifyFd, F_SETFD, FD_CLOEXEC); -// m_notifier = new QSocketNotifier(m_inotifyFd, QSocketNotifier::Read); -// connect(m_notifier, &QSocketNotifier::activated, this, &InotifyWatch::slotEvent, Qt::DirectConnection); -// exec(); -} - -void InotifyWatch::slotEvent(char *buf, ssize_t len) -{ -// eventProcess(socket); - if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) { - pid_t pid; - pid = fork(); - if(pid == 0) { - prctl(PR_SET_PDEATHSIG, SIGTERM); - prctl(PR_SET_NAME, "inotify-index"); - this->eventProcess(buf, len); - fd_set read_fds; - int rc; - timeval* read_timeout = (timeval*)malloc(sizeof(timeval)); - read_timeout->tv_sec = 40; - read_timeout->tv_usec = 0; - for(;;) { - FD_ZERO(&read_fds); - FD_SET(m_inotifyFd, &read_fds); - rc = select(m_inotifyFd + 1, &read_fds, NULL, NULL, read_timeout); - if(rc < 0) { - // error - qWarning() << "fork select result < 0, error!"; - IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); - assert(false); - } else if(rc == 0) { - qDebug() << "select timeout!"; - ::free(read_timeout); - - QBuffer buffer; - QDataStream out(&buffer); - if (m_sharedMemory->isAttached()) { - m_sharedMemory->detach(); - } - buffer.open(QBuffer::ReadWrite); - out << m_pathMap; - int size = buffer.size(); - if (!m_sharedMemory->create(size)) { - qDebug() << "Create sharedMemory Error: " << m_sharedMemory->errorString(); - } else { - m_sharedMemory->lock(); - char *to = static_cast(m_sharedMemory->data()); - const char *from = buffer.data().constData(); - memcpy(to, from, qMin(size, m_sharedMemory->size())); - m_sharedMemory->unlock(); - } - // GlobalSettings::getInstance()->forceSync(); - PendingFileQueue::getInstance()->forceFinish(); - PendingFileQueue::getInstance()->~PendingFileQueue(); - ::_exit(0); - } else { -// qDebug() << "Select remain:" <tv_sec; - this->eventProcess(m_inotifyFd); -// qDebug() << "Select remain:" <tv_sec; - } - } - } else if(pid > 0) { - waitpid(pid, NULL, 0); - if (!m_sharedMemory->attach()) { - qDebug() << "SharedMemory attach Error: " << m_sharedMemory->errorString(); - } else { - QBuffer buffer; - QDataStream in(&buffer); - QMap pathMap; - m_sharedMemory->lock(); - buffer.setData(static_cast(m_sharedMemory->constData()), m_sharedMemory->size()); - buffer.open(QBuffer::ReadWrite); - in >> pathMap; - m_sharedMemory->unlock(); - m_sharedMemory->detach(); - m_pathMapLock.lock(); - m_pathMap = pathMap; - m_pathMapLock.unlock(); - } - } else { - assert(false); - } - } -} - -char * InotifyWatch::filter() -{ - int avail; - if (ioctl(m_inotifyFd, FIONREAD, &avail) == EINVAL) { - qWarning() << "Did not receive an entire inotify event."; - return NULL; - } - - char* buffer = (char*)malloc(avail); - memset(buffer, 0x00, avail); - - const int len = read(m_inotifyFd, buffer, avail); - if(len != avail) { - qWarning()<<"read event error"; -// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); - } - - int i = 0; - while (i < len) { - const struct inotify_event* event = (struct inotify_event*)&buffer[i]; - if(event->name[0] == '.') { - // qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask; - // qDebug("mask:0x%x,",event->mask); - i += sizeof(struct inotify_event) + event->len; - return buffer; - } - } - free(buffer); - return NULL; -} -void InotifyWatch::eventProcess(int socket) -{ -// qDebug()<< "Enter eventProcess!"; - int avail; - if (ioctl(socket, FIONREAD, &avail) == EINVAL) { - qWarning() << "Did not receive an entire inotify event."; - return; - } - - char* buffer = (char*)malloc(avail); - memset(buffer, 0x00, avail); - - const ssize_t len = read(socket, buffer, avail); - if(len != avail) { - qWarning()<<"read event error"; -// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); - } - int i = 0; - while (i < len) { - const struct inotify_event* event = (struct inotify_event*)&buffer[i]; - if(event->name[0] != '.') { -// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask; -// qDebug("mask:0x%x,",event->mask); - break; - } - i += sizeof(struct inotify_event) + event->len; - } - if(i >= len) { - qDebug() << "There is nothing to do!"; - free(buffer); - return; - } - eventProcess(buffer, len); - free(buffer); -} - -void InotifyWatch::eventProcess(const char *buffer, ssize_t len) -{ -// qDebug()<< "Begin eventProcess! len:" << len; - - char * p = const_cast(buffer); - while (p < buffer + len) { - const struct inotify_event* event = reinterpret_cast(p); -// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask; -// qDebug("mask:0x%x,",event->mask); - if(event->name[0] != '.') { - QString path = m_pathMap[event->wd] + '/' + event->name; - - //过滤黑名单下的信号 - for(QString i : m_blockList) { - if(FileUtils::isOrUnder(path, i)) - goto next; - } - - //Create top dir first, traverse it last. - if(event->mask & IN_CREATE) { -// qDebug() << "IN_CREATE"; - PendingFile f(path); - if(event->mask & IN_ISDIR) { - f.setIsDir(); - } - PendingFileQueue::getInstance()->enqueue(f); - - if(event->mask & IN_ISDIR) { - if(!QFileInfo(path).isSymLink()){ - addWatch(path); - setPath(QStringList() << path); - Traverse(); - } - } - goto next; - - } - - if((event->mask & IN_DELETE) | (event->mask & IN_MOVED_FROM)) { - qDebug() << "IN_DELETE or IN_MOVED_FROM"; - if(event->mask & IN_ISDIR) { - removeWatch(path); - } else { - PendingFile f(path); - f.setDeleted(); - PendingFileQueue::getInstance()->enqueue(f); - } - p += sizeof(struct inotify_event) + event->len; - continue; - } - if(event->mask & IN_MODIFY) { -// qDebug() << "IN_MODIFY"; - if(!(event->mask & IN_ISDIR)) { - PendingFileQueue::getInstance()->enqueue(PendingFile(path)); - } - goto next; - - } - - if(event->mask & IN_MOVED_TO) { - qDebug() << "IN_MOVED_TO"; - if(event->mask & IN_ISDIR) { - removeWatch(path); - - PendingFile f(path); - f.setIsDir(); - PendingFileQueue::getInstance()->enqueue(f); - - if(!QFileInfo(path).isSymLink()){ - addWatch(path); - setPath(QStringList() << path); - Traverse(); - } - } else { - //Enqueue a deleted file to merge. - PendingFile f(path); - f.setDeleted(); - PendingFileQueue::getInstance()->enqueue(f); - //Enqueue a new one. - PendingFileQueue::getInstance()->enqueue(PendingFile(path)); - } - goto next; - } - } -next: - p += sizeof(struct inotify_event) + event->len; - } -// qDebug()<< "Finish eventProcess!"; -} - - diff --git a/libsearch/index/inotify-watch.h b/libsearch/index/inotify-watch.h deleted file mode 100644 index 7457262..0000000 --- a/libsearch/index/inotify-watch.h +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef INOTIFYWATCH_H -#define INOTIFYWATCH_H - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "traverse-bfs.h" -#include "ukui-search-qdbus.h" -#include "index-status-recorder.h" -#include "file-utils.h" -#include "pending-file-queue.h" -#include "common.h" -namespace UkuiSearch { -class InotifyWatch : public QThread, public TraverseBFS -{ - Q_OBJECT -public: - static InotifyWatch* getInstance(); - bool addWatch(const QString &path); - bool removeWatch(const QString &path, bool removeFromDatabase = true); - virtual void work(const QFileInfo &info) final; - - void firstTraverse(QStringList pathList = {}, QStringList blockList = {}); - void stopWatch(); - void addIndexPath(const QString path, const QStringList blockList); - void removeIndexPath(const QString &path, bool fileIndexEnable); -protected: - void run() override; - -private Q_SLOTS: - void slotEvent(char *buf, ssize_t len); -private: - explicit InotifyWatch(); - ~InotifyWatch(); - char * filter(); - void eventProcess(int socket); - void eventProcess(const char *buffer, ssize_t len); - - int m_inotifyFd; - QSocketNotifier* m_notifier = nullptr; - QSharedMemory *m_sharedMemory = nullptr; - QMap m_pathMap; - QMutex m_pathMapLock; - QSystemSemaphore m_semaphore; - -}; -} -#endif // INOTIFYWATCH_H diff --git a/libsearch/index/monitor.cpp b/libsearch/index/monitor.cpp new file mode 100644 index 0000000..e9d7f98 --- /dev/null +++ b/libsearch/index/monitor.cpp @@ -0,0 +1,97 @@ +#include "monitor.h" +#include "file-indexer-config.h" +using namespace UkuiSearch; +Monitor::Monitor(IndexScheduler *scheduler, QObject *parent) + : QObject(parent), + m_scheduler(scheduler), + m_basicDatabase(DataBaseType::Basic), + m_contentDatabase(DataBaseType::Content) +{ + connect(scheduler, &IndexScheduler::stateChange, this, &Monitor::indexStateChanged); + connect(scheduler, &IndexScheduler::stateChange, this, &Monitor::onIndexStateChanged); + connect(scheduler, &IndexScheduler::process, this, &Monitor::processUpdate); +} + +QStringList Monitor::getCurrentIndexPaths() +{ + return FileIndexerConfig::getInstance()->currentIndexableDir(); +} + +IndexScheduler::IndexerState Monitor::getIndexState() +{ + return m_scheduler->getIndexState(); +} + +uint Monitor::getBasicIndexSize() +{ + return m_basicIndexSize; +} + +uint Monitor::getContentIndexSize() +{ + return m_contentIndexSize; +} + +uint Monitor::getOCRIndexSize() +{ + return m_ocrIndexSize; +} + +uint Monitor::getBasicIndexProgress() +{ + return m_basicIndexProgress; +} + +uint Monitor::getContentIndexProgress() +{ + return m_contentIndexProgress; +} + +uint Monitor::getOCRIndexProgress() +{ + return m_ocrIndexProgress; +} + +uint Monitor::getBasicIndexDocNum() +{ + return m_basicDatabase.getIndexDocCount(); +} + +uint Monitor::getContentIndexDocNum() +{ + return m_contentDatabase.getIndexDocCount(); +} + +void Monitor::onIndexStateChanged(IndexScheduler::IndexerState state) +{ + if(state == IndexScheduler::IndexerState::Idle) { + Q_EMIT basicIndexDocNumUpdate(m_basicDatabase.getIndexDocCount()); + Q_EMIT contentIndexDocNumUpdate(m_contentDatabase.getIndexDocCount()); + } +} + +void Monitor::processUpdate(IndexType type, uint all, uint finished) +{ + switch (type) { + case IndexType::Basic: + m_basicIndexSize = all; + Q_EMIT basicIndexSizeChange(m_basicIndexSize); + m_basicIndexProgress = finished; + Q_EMIT basicIndexProgressUpdate(m_basicIndexProgress); + break; + case IndexType::Contents: + m_contentIndexSize = all; + Q_EMIT contentIndexSizeChange(m_contentIndexSize); + m_contentIndexProgress = finished; + Q_EMIT contentIndexProgressUpdate(m_contentIndexProgress); + break; + case IndexType::OCR: + m_ocrIndexSize = all; + Q_EMIT ocrIndexSizeChange(m_ocrIndexSize); + m_contentIndexProgress = finished; + Q_EMIT ocrIndexProgressUpdate(m_contentIndexProgress); + break; + default: + break; + } +} diff --git a/libsearch/index/monitor.h b/libsearch/index/monitor.h new file mode 100644 index 0000000..ce6a861 --- /dev/null +++ b/libsearch/index/monitor.h @@ -0,0 +1,68 @@ +#ifndef MONITOR_H +#define MONITOR_H + +#include +#include "index-scheduler.h" +#include "database.h" +namespace UkuiSearch { +/** + * @brief The Monitor class + * 用于监控索引状态 + * 为qml + */ +class Monitor : public QObject +{ + Q_OBJECT + Q_PROPERTY(QStringList currentIndexPaths READ getCurrentIndexPaths) + Q_PROPERTY(IndexScheduler::IndexerState indexState READ getIndexState NOTIFY indexStateChanged) + Q_PROPERTY(uint basicIndexSize READ getBasicIndexSize NOTIFY basicIndexSizeChange) + Q_PROPERTY(uint contentIndexSize READ getContentIndexSize NOTIFY contentIndexSizeChange) + Q_PROPERTY(uint ocrIndexSize READ getOCRIndexSize NOTIFY ocrIndexSizeChange) + Q_PROPERTY(uint basicIndexProgress READ getBasicIndexProgress NOTIFY basicIndexProgressUpdate) + Q_PROPERTY(uint contentIndexProgress READ getContentIndexProgress NOTIFY contentIndexProgressUpdate) + Q_PROPERTY(uint ocrIndexProgress READ getOCRIndexProgress NOTIFY ocrIndexProgressUpdate) + Q_PROPERTY(uint basicIndexDocNum READ getBasicIndexDocNum NOTIFY basicIndexDocNumUpdate) + Q_PROPERTY(uint contentIndexDocNum READ getContentIndexDocNum NOTIFY contentIndexDocNumUpdate) + +public: + explicit Monitor(IndexScheduler* scheduler, QObject *parent = nullptr); + QStringList getCurrentIndexPaths(); + IndexScheduler::IndexerState getIndexState(); + uint getBasicIndexSize(); + uint getContentIndexSize(); + uint getOCRIndexSize(); + uint getBasicIndexProgress(); + uint getContentIndexProgress(); + uint getOCRIndexProgress(); + uint getBasicIndexDocNum(); + uint getContentIndexDocNum(); + +Q_SIGNALS: + void indexStateChanged(IndexScheduler::IndexerState); + void basicIndexSizeChange(uint); + void contentIndexSizeChange(uint); + void ocrIndexSizeChange(uint); + + void basicIndexProgressUpdate(uint); + void contentIndexProgressUpdate(uint); + void ocrIndexProgressUpdate(uint); + void basicIndexDocNumUpdate(uint); + void contentIndexDocNumUpdate(uint); + +private Q_SLOTS: + void onIndexStateChanged(IndexScheduler::IndexerState); + void processUpdate(IndexType type, uint all, uint finished); + +private: + IndexScheduler *m_scheduler = nullptr; + Database m_basicDatabase; + Database m_contentDatabase; + uint m_basicIndexSize = 0; + uint m_contentIndexSize = 0; + uint m_ocrIndexSize = 0; + uint m_basicIndexProgress = 0; + uint m_contentIndexProgress = 0; + uint m_ocrIndexProgress = 0; +}; +} +#endif // MONITOR_H diff --git a/libsearch/index/ocrobject.cpp b/libsearch/index/ocrobject.cpp index 6d3d717..b2766e3 100644 --- a/libsearch/index/ocrobject.cpp +++ b/libsearch/index/ocrobject.cpp @@ -11,7 +11,7 @@ OcrObject *OcrObject::getInstance() return m_instance; } -void OcrObject::getTxtContent(QString &path, QString &textcontent) +void OcrObject::getTxtContent(const QString &path, QString &textcontent) { // m_api = new tesseract::TessBaseAPI(); // if (m_api->Init(NULL, "chi_sim")) { @@ -53,7 +53,7 @@ void OcrObject::getTxtContent(QString &path, QString &textcontent) Pix *image = pixRead(path.toStdString().data()); if (!image) { - qDebug() << "path:" << path <<" pixRead error!"; +// qDebug() << "path:" << path <<" pixRead error!"; if (api) { api->End(); delete api; @@ -62,7 +62,9 @@ void OcrObject::getTxtContent(QString &path, QString &textcontent) return; } api->SetImage(image); - textcontent = api->GetUTF8Text(); + char *tmp = api->GetUTF8Text(); + textcontent = QString::fromLocal8Bit(tmp); + delete [] tmp; //qDebug() << " Text:" << textcontent; pixDestroy(&image); api->Clear(); diff --git a/libsearch/index/ocrobject.h b/libsearch/index/ocrobject.h index b686352..2a20536 100644 --- a/libsearch/index/ocrobject.h +++ b/libsearch/index/ocrobject.h @@ -14,7 +14,7 @@ class OcrObject : public QObject public: static OcrObject* getInstance(); - void getTxtContent(QString &path, QString &textcontent); + void getTxtContent(const QString &path, QString &textcontent); protected: explicit OcrObject(QObject *parent = nullptr); diff --git a/libsearch/index/pending-file-queue.cpp b/libsearch/index/pending-file-queue.cpp index a0ee56c..dd93b21 100644 --- a/libsearch/index/pending-file-queue.cpp +++ b/libsearch/index/pending-file-queue.cpp @@ -20,9 +20,10 @@ #include "pending-file-queue.h" #include "file-utils.h" #include +#include "index-status-recorder.h" using namespace UkuiSearch; static PendingFileQueue *global_instance_pending_file_queue = nullptr; -PendingFileQueue::PendingFileQueue(QObject *parent) : QThread(parent), m_semaphore(INDEX_SEM, 0, QSystemSemaphore::AccessMode::Open) +PendingFileQueue::PendingFileQueue(QObject *parent) : QThread(parent) { this->start(); @@ -67,8 +68,6 @@ PendingFileQueue::~PendingFileQueue() m_minProcessTimer = nullptr; } global_instance_pending_file_queue = nullptr; - - IndexGenerator::getInstance()->~IndexGenerator(); } void PendingFileQueue::forceFinish() @@ -77,16 +76,13 @@ void PendingFileQueue::forceFinish() this->quit(); this->wait(); processCache(); - m_semaphore.release(1); } void PendingFileQueue::enqueue(const PendingFile &file) { // qDebug() << "enqueuq file: " << file.path(); m_mutex.lock(); m_enqueuetimes++; - if(m_cache.isEmpty()) { - IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "0"); - } + // Remove all indexs of files under a dir which is to about be deleted,but keep delete signals. // Because our datebase need to delete those indexs one by one. if(file.shouldRemoveIndex() && file.isDir()) { @@ -112,7 +108,6 @@ void PendingFileQueue::enqueue(const PendingFile &file) if(!m_cacheTimer->isActive()) { // qDebug()<<"m_cacheTimer-----start!!"; -// m_cacheTimer->start(); Q_EMIT cacheTimerStart(); } Q_EMIT minProcessTimerStart(); @@ -130,41 +125,21 @@ void PendingFileQueue::enqueue(const PendingFile &file) void PendingFileQueue::run() { - //阻塞线程直到first-index进程结束 - m_semaphore.acquire(); exec(); } void PendingFileQueue::processCache() { - qDebug()<< "Begin processCache!" ; + qDebug()<< "PendingFileQueue Begin processCache!" ; + QVector pendingFiles; m_mutex.lock(); - qDebug() << "Events:" << m_enqueuetimes; - m_enqueuetimes = 0; - m_cache.swap(m_pendingFiles); -// m_pendingFiles = m_cache; -// m_cache.clear(); -// m_cache.squeeze(); + m_cache.swap(pendingFiles); m_mutex.unlock(); - qDebug() << "Current process-------------"; - for(PendingFile i : m_pendingFiles) { - qDebug() << "|" << i.path(); - qDebug() << "|" <updateIndex(&m_pendingFiles); - m_mutex.lock(); - if(m_cache.isEmpty()) { - IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2"); - } - m_mutex.unlock(); - m_pendingFiles.clear(); - m_pendingFiles.squeeze(); - malloc_trim(0); - qDebug()<< "Finish processCache!"; return; } diff --git a/libsearch/index/pending-file-queue.h b/libsearch/index/pending-file-queue.h index dea0648..ef3bda3 100644 --- a/libsearch/index/pending-file-queue.h +++ b/libsearch/index/pending-file-queue.h @@ -25,9 +25,7 @@ #include #include #include -#include #include "pending-file.h" -#include "index-generator.h" namespace UkuiSearch { class PendingFileQueue : public QThread @@ -46,22 +44,21 @@ public: protected: void run() override; + Q_SIGNALS: void cacheTimerStart(); void minProcessTimerStart(); void timerStop(); + + void filesUpdate(const QVector&); private: void processCache(); explicit PendingFileQueue(QObject *parent = nullptr); QVector m_cache; - QVector m_pendingFiles; QMutex m_mutex; - QMutex m_timeoutMutex; - QSystemSemaphore m_semaphore; QThread *m_timerThread = nullptr; - bool m_timeout = false; int m_enqueuetimes = 0; }; diff --git a/libsearch/index/pending-file.cpp b/libsearch/index/pending-file.cpp index 4cd5dbc..b51dd7d 100644 --- a/libsearch/index/pending-file.cpp +++ b/libsearch/index/pending-file.cpp @@ -42,6 +42,11 @@ void PendingFile::setPath(const QString& path) m_path = path; } +void PendingFile::setIsDir(bool isDir) +{ + m_isDir = isDir; +} + //bool PendingFile::isNewFile() const //{ // return m_created; diff --git a/libsearch/index/pending-file.h b/libsearch/index/pending-file.h index f3a825c..9de7cce 100644 --- a/libsearch/index/pending-file.h +++ b/libsearch/index/pending-file.h @@ -33,12 +33,23 @@ public: QString path() const; void setPath(const QString& path); + QString suffix(); + void setIsDir(){ m_isDir = true; } + void setIsDir(bool isDir); + void setModified() { m_modified = true; } -// void setCreated() { m_created = true; } + bool isModified() { return m_modified; } + + void setCreated() { m_created = true; } + void setDeleted() { m_deleted = true; } + + bool isCreated() const {return m_created;} + bool shouldRemoveIndex() const; // bool shouldIndexContents() const; + bool isDir() const; bool operator == (const PendingFile& rhs) const { @@ -54,7 +65,7 @@ public: private: QString m_path; -// bool m_created : 1; + bool m_created : 1; bool m_deleted : 1; bool m_modified : 1; bool m_isDir : 1; diff --git a/libsearch/index/search-manager.h b/libsearch/index/search-manager.h index 006ae0e..ff45f36 100644 --- a/libsearch/index/search-manager.h +++ b/libsearch/index/search-manager.h @@ -36,7 +36,25 @@ #include #include #include - +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ #include "search-plugin-iface.h" #include "file-utils.h" #include "global-settings.h" diff --git a/libsearch/index/ukui-search-qdbus.cpp b/libsearch/index/ukui-search-qdbus.cpp index 5c6159e..35f2ef7 100644 --- a/libsearch/index/ukui-search-qdbus.cpp +++ b/libsearch/index/ukui-search-qdbus.cpp @@ -36,14 +36,10 @@ UkuiSearchQDBus::~UkuiSearchQDBus() { this->tmpSystemQDBusInterface = nullptr; } -//一键三连 void UkuiSearchQDBus::setInotifyMaxUserWatches() { // /proc/sys/fs/inotify/max_user_watches -// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep1"); - // sysctl + this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2"); - // /etc/sysctl.conf - // this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3"); } void UkuiSearchQDBus::addInotifyUserInstances(int addNum) diff --git a/libsearch/index/writable-database.cpp b/libsearch/index/writable-database.cpp new file mode 100644 index 0000000..efb6c7e --- /dev/null +++ b/libsearch/index/writable-database.cpp @@ -0,0 +1,178 @@ + +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#include "writable-database.h" +#include +#include +#include +#include "file-utils.h" +#include "index-status-recorder.h" +using namespace UkuiSearch; + +static QMutex g_basicDatabaseMutex; +static QMutex g_contentDatabaseMutex; + +#define DATABASE_TRY(code) try { \ +code; \ +} \ +catch (const Xapian::Error &e)\ +{\ + qWarning() << "Transaction error," << QString::fromStdString(e.get_description()); \ + errorRecord(); \ +} + +WritableDatabase::WritableDatabase(const DataBaseType &type) + : m_type(type) +{ + switch (type) { + case DataBaseType::Basic: + m_path = INDEX_PATH; + m_mutex = &g_basicDatabaseMutex; + break; + case DataBaseType::Content: + m_path = CONTENT_INDEX_PATH; + m_mutex = &g_contentDatabaseMutex; + break; + default: + break; + } + m_mutex->lock(); +} + +WritableDatabase::~WritableDatabase() +{ + if(m_xpDatabase) { + delete m_xpDatabase; + m_xpDatabase = nullptr; + } + m_mutex->unlock(); +} + +bool WritableDatabase::open() +{ + if(m_xpDatabase) { + return true; + } + QDir database(m_path); + if(!database.exists()) { + qDebug() << "Create basic writable database" << m_path<< database.mkpath(m_path); + } + try { + m_xpDatabase = new Xapian::WritableDatabase(m_path.toStdString(), Xapian::DB_CREATE_OR_OPEN); + } catch(const Xapian::Error &e) { + qWarning() << "Open WritableDatabase fail!" << m_path << QString::fromStdString(e.get_description()); + return false; + } + return true; +} + +void WritableDatabase::rebuild() +{ + if(m_xpDatabase) { + delete m_xpDatabase; + m_xpDatabase = nullptr; + } + + QDir database(m_path); + if(database.exists()) { + qDebug() << "Dababase rebuild, remove" << m_path << database.removeRecursively(); + } else { + qDebug() << "Dababase rebuild, create" << m_path << database.mkpath(m_path); + } +} + +void WritableDatabase::beginTransation() +{ + DATABASE_TRY(m_xpDatabase->begin_transaction();) +} + +void WritableDatabase::endTransation() +{ + DATABASE_TRY(m_xpDatabase->commit_transaction();) +} + +void WritableDatabase::cancelTransation() +{ + DATABASE_TRY(m_xpDatabase->cancel_transaction();) +} + +void WritableDatabase::commit() +{ + DATABASE_TRY(m_xpDatabase->commit();) +} + +void WritableDatabase::addDocument(const Document &doc) +{ + DATABASE_TRY(m_xpDatabase->replace_document(doc.getUniqueTerm(), doc.getXapianDocument());\ + m_xpDatabase->set_metadata(doc.getUniqueTerm(), doc.indexTime());) +} + +void WritableDatabase::removeDocument(const QString &path) +{ + DATABASE_TRY(m_xpDatabase->delete_document(FileUtils::makeDocUterm(path));) +} + +void WritableDatabase::removeDocument(const std::string uniqueTerm) +{ + DATABASE_TRY(m_xpDatabase->delete_document(uniqueTerm);) +} + +void WritableDatabase::setMetaData(const QString &key, const QString &value) +{ + DATABASE_TRY(m_xpDatabase->set_metadata(key.toStdString(), value.toStdString());) +} + +void WritableDatabase::setMetaData(const std::string &key, const std::string &value) +{ + DATABASE_TRY(m_xpDatabase->set_metadata(key, value);) +} + +const std::string WritableDatabase::getMetaData(const std::string &key) +{ + std::string value; + DATABASE_TRY(value = m_xpDatabase->get_metadata(key);) + return value; +} + +QMap WritableDatabase::getIndexTimes() +{ + QMap indexTimes; + DATABASE_TRY( + for(Xapian::TermIterator iter = m_xpDatabase->metadata_keys_begin(); + iter != m_xpDatabase->metadata_keys_end(); ++iter) { + indexTimes.insert(*iter, m_xpDatabase->get_metadata(*iter)); + }) + return indexTimes; +} + +void WritableDatabase::errorRecord() +{ + switch (m_type) { + case DataBaseType::Basic: + IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, IndexStatusRecorder::State::Error); + break; + case DataBaseType::Content: + IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, IndexStatusRecorder::State::Error); + break; + default: + break; + } +} + diff --git a/libsearch/index/writable-database.h b/libsearch/index/writable-database.h new file mode 100644 index 0000000..1f13b38 --- /dev/null +++ b/libsearch/index/writable-database.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2022, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: iaom + * + */ +#ifndef DATABASE_H +#define DATABASE_H +#include +#include +#include "document.h" +#include "common.h" +namespace UkuiSearch { +/** + * @brief The WritableDatabase class\ + * Wrapper for Xapian::WritableDatabase + */ +class WritableDatabase +{ + friend class Transaction; +public: + + WritableDatabase(const DataBaseType &type); + ~WritableDatabase(); + bool open(); + void rebuild(); + + void beginTransation(); + void endTransation(); + void cancelTransation(); + void commit(); + + void addDocument(const Document& doc); + void removeDocument(const QString& path); + void removeDocument(const std::string uniqueTerm); + void setMetaData(const QString& key, const QString& value); + void setMetaData(const std::string& key, const std::string& value); + const std::string getMetaData(const std::string &key); + QMap getIndexTimes(); + +private: + WritableDatabase(const WritableDatabase& rhs) = delete; + void errorRecord(); + + DataBaseType m_type; + QString m_path; + Xapian::WritableDatabase* m_xpDatabase = nullptr; + QMutex *m_mutex = nullptr; +}; +} +#endif // DATABASE_H diff --git a/libsearch/libsearch.h b/libsearch/libsearch.h index a47d520..544338d 100644 --- a/libsearch/libsearch.h +++ b/libsearch/libsearch.h @@ -27,8 +27,6 @@ #include "plugininterface/search-plugin-iface.h" #include "plugininterface/data-queue.h" -#include "index/file-index-manager.h" -#include "index/first-index.h" #include "index/ukui-search-qdbus.h" #include "index/search-manager.h" diff --git a/libsearch/parser/binary-parser.cpp b/libsearch/parser/binary-parser.cpp index a374f4b..53133da 100644 --- a/libsearch/parser/binary-parser.cpp +++ b/libsearch/parser/binary-parser.cpp @@ -4745,12 +4745,13 @@ bool bCreateSmallBlockList(ULONG ulStartblock, const ULONG *aulBBD, size_t tBBDL if(tSmallBlockListLen == 0) { /* There is no small block list */ - aulSmallBlockList = NULL; return true; } /* Create the small block list */ tSize = tSmallBlockListLen * sizeof(ULONG); + xfree(aulSmallBlockList); + aulSmallBlockList = NULL; aulSmallBlockList = (ULONG*)xmalloc(tSize); for(iIndex = 0, ulTmp = ulStartblock; iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN; @@ -4979,7 +4980,7 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS, } else { //need more format document ptaucBytes = (UCHAR*)xfree((void*)ptaucBytes); - qWarning() << "Parser error:" << m_strFileName; +// qWarning() << "Parser error:" << m_strFileName; // content.append(QString::fromStdString((char*)ptaucBytes).replace("\r","")); // ptaucBytes = (UCHAR*)xfree((void*)ptaucBytes); } @@ -5057,15 +5058,19 @@ int KBinaryParser:: readSSTRecord(readDataParam &rdParam, ppsInfoType PPS_info, if(usOthTxtLen > 0) { memset(chTemp, 0, MAX_BUFF_SIZE); - if(readData(rdParam, chTemp, ulOff, usOthTxtLen) != 0) + if(readData(rdParam, chTemp, ulOff, usOthTxtLen) != 0) { + chData = (UCHAR*)xfree((void*)chData); return -1; + } memcpy(chData + usIdf, chTemp, usOthTxtLen); } if(bTemp) usPartLen --; } else { - if(readData(rdParam, chData, ulOff + ulNextOff, ustotalLen) != 0) + if(readData(rdParam, chData, ulOff + ulNextOff, ustotalLen) != 0) { + chData = (UCHAR*)xfree((void*)chData); break; + } } if(eRrd.bUni) { @@ -5204,8 +5209,11 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) { iToGo -= 127; } - if(!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) + if(!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) { + xfree(aulBbdList); + xfree(aulBBD); return -1; + } aulBbdList = (ULONG*)xfree(aulBbdList); /* Small Block Depot */ @@ -5217,14 +5225,19 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) { iIndex++, ulTmp = aulBBD[ulTmp]) { if(ulTmp >= (ULONG)tBBDLen) { qWarning("The Big Block Depot is damaged"); + aulSBD = (ULONG*)xfree(aulSBD); + aulBBD = (ULONG*)xfree(aulBBD); return -1; } aulSbdList[iIndex] = ulTmp; } - if(!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) + if(!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) { + aulSBD = (ULONG*)xfree(aulSBD); + aulBBD = (ULONG*)xfree(aulBBD); return -1; + } aulSbdList = (ULONG*)xfree(aulSbdList); @@ -5232,26 +5245,38 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) { for(tRootListLen = 0, ulTmp = ulRootStartblock; tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN; tRootListLen++, ulTmp = aulBBD[ulTmp]) { - if(ulTmp >= (ULONG)tBBDLen) + if(ulTmp >= (ULONG)tBBDLen) { + aulSBD = (ULONG*)xfree(aulSBD); + aulBBD = (ULONG*)xfree(aulBBD); return -1; + } } - if(tRootListLen == 0) + if(tRootListLen == 0) { + aulSBD = (ULONG*)xfree(aulSBD); + aulBBD = (ULONG*)xfree(aulBBD); return -1; + } aulRootList = (ULONG*)xcalloc(tRootListLen, sizeof(ULONG)); for(iIndex = 0, ulTmp = ulRootStartblock; iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN; iIndex++, ulTmp = aulBBD[ulTmp]) { - if(ulTmp >= (ULONG)tBBDLen) + if(ulTmp >= (ULONG)tBBDLen) { + aulSBD = (ULONG*)xfree(aulSBD); + aulBBD = (ULONG*)xfree(aulBBD); return -1; + } aulRootList[iIndex] = ulTmp; } bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info); aulRootList = (ULONG*)xfree(aulRootList); - if(!bSuccess) + if(!bSuccess) { + aulSBD = (ULONG*)xfree(aulSBD); + aulBBD = (ULONG*)xfree(aulBBD); return -1; + } rdPara readParam; readParam.pFile = pFile; @@ -5262,8 +5287,11 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) { readParam.ulStBlk = PPS_info.tWordDocument.ulSB; UCHAR aucHeader[HEADER_SIZE]; /* Small block list */ - if(!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) + if(!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) { + aulSBD = (ULONG*)xfree(aulSBD); + readParam.ulBBd = (ULONG*)xfree(readParam.ulBBd); return -1; + } if(PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) { readParam.ulBBd = aulSBD; @@ -5271,8 +5299,11 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) { readParam.usBlkSize = SMALL_BLOCK_SIZE; } - if(readData(readParam, aucHeader, 0, HEADER_SIZE) != 0) + if(readData(readParam, aucHeader, 0, HEADER_SIZE) != 0) { + aulSBD = (ULONG*)xfree(aulSBD); + aulBBD = (ULONG*)xfree(aulBBD); return -1; + } usIdent = usGetWord(0x00, aucHeader); @@ -5281,8 +5312,11 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) { usIdent != 0xa5dc && /* Word 6 & 7 */ usIdent != 0xa5ec && /* Word 7 & 97 & 98 */ usIdent != 0xa697 && /* Word 7 for oriental languages */ - usIdent != 0xa699) /* Word 7 for oriental languages */ + usIdent != 0xa699) { /* Word 7 for oriental languages */ + aulSBD = (ULONG*)xfree(aulSBD); + aulBBD = (ULONG*)xfree(aulBBD); return -1; + } /* Get the status flags from the header */ usDocStatus = usGetWord(0x0a, aucHeader); @@ -5299,8 +5333,11 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) { readParam.ulStBlk = PPS_info.tWorkBook.ulSB; UCHAR aucHeader[4]; ulong ulOff = 0; - if(readData(readParam, aucHeader, 0, 8) != 0) + if(readData(readParam, aucHeader, 0, 8) != 0) { + aulSBD = (ULONG*)xfree(aulSBD); + aulBBD = (ULONG*)xfree(aulBBD); return -1; + } ulOff += 4; USHORT usType = usGetWord(0x00, aucHeader); @@ -5325,6 +5362,8 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) { } else { qWarning() << "Unsupport doc type:" << m_strFileName; } + aulSBD = (ULONG*)xfree(aulSBD); + aulBBD = (ULONG*)xfree(aulBBD); return 0; } @@ -5347,7 +5386,10 @@ KBinaryParser::KBinaryParser(QObject *parent) } KBinaryParser::~KBinaryParser() -{} +{ + xfree(aulSmallBlockList); + aulSmallBlockList = NULL; +} bool KBinaryParser::RunParser(QString strFile, QString &content) { FILE* pFile = fopen(strFile.toLocal8Bit().data(), "rb"); diff --git a/libsearch/searchinterface/search-controller.h b/libsearch/searchinterface/search-controller.h index 51918ef..53b219a 100644 --- a/libsearch/searchinterface/search-controller.h +++ b/libsearch/searchinterface/search-controller.h @@ -12,8 +12,6 @@ class ResultItem; class SearchControllerPrivate; /* *搜索控制,用于传递搜索条件,搜索唯一ID,以及管理队列等。 - *为树形结构,所有子节点含有智能指针指向父节点。 - *只要有插件还在使用资源,其对应的子节点以及通向根节点上所有的实例就不会被释放,析构时,从下往上由智能指针自动析构 */ class SearchController { diff --git a/tests/file-system-watcher-test.cpp b/tests/file-system-watcher-test.cpp index 88323bf..caa943e 100644 --- a/tests/file-system-watcher-test.cpp +++ b/tests/file-system-watcher-test.cpp @@ -3,7 +3,7 @@ using namespace UkuiSearch; FileSystemWatcherTest::FileSystemWatcherTest(QObject *parent) : QObject(parent) { - m_watcher = new FileSystemWatcher(FileSystemWatcher::WatchEvents(FileSystemWatcher::EventMove | FileSystemWatcher::EventMoveSelf | + m_watcher = new FileSystemWatcher(true, FileSystemWatcher::WatchEvents(FileSystemWatcher::EventMove | FileSystemWatcher::EventMoveSelf | FileSystemWatcher::EventCreate | FileSystemWatcher::EventDelete | FileSystemWatcher::EventDeleteSelf | FileSystemWatcher::EventUnmount | FileSystemWatcher::EventModify | FileSystemWatcher::EventAttributeChange)); @@ -11,18 +11,23 @@ FileSystemWatcherTest::FileSystemWatcherTest(QObject *parent) : QObject(parent) void FileSystemWatcherTest::beginSignalTest() { - m_watcher->addWatch("/home/zpf/图片"); + m_watcher->addWatch("/home/zpf/图片/新建文夹"); connect(m_watcher, &FileSystemWatcher::attributeChanged, [](const QString& fileUrl) { qDebug() << "AttrbuteChanged:" << fileUrl; }); + connect(m_watcher, &FileSystemWatcher::created, [](const QString& fileUrl, bool isDir) { qDebug() << "Created:" << fileUrl << isDir; }); + connect(m_watcher, &FileSystemWatcher::deleted, [](const QString& fileUrl, bool isDir) { qDebug() << "Deleted:" << fileUrl << isDir; }); + connect(m_watcher, &FileSystemWatcher::modified, [](const QString& fileUrl) { qDebug() << "Modified:" << fileUrl; }); + connect(m_watcher, &FileSystemWatcher::moved, - [](const QString& fileUrl, bool isDir) { qDebug() << "Modified:" << fileUrl << isDir; }); + [](const QString& fileUrl, bool isDir) { qDebug() << "moved:" << fileUrl << isDir; }); + connect(m_watcher, &FileSystemWatcher::closedWrite, [](const QString& fileUrl) { qDebug() << "ClosedWrite:" << fileUrl; }); } diff --git a/ukui-search-service/qml/IndexMonitor.qml b/ukui-search-service/qml/IndexMonitor.qml new file mode 100644 index 0000000..7ae4791 --- /dev/null +++ b/ukui-search-service/qml/IndexMonitor.qml @@ -0,0 +1,6 @@ +import QtQuick 2.0 +import QtQuick.Window 2.12 + +Window { + +} diff --git a/ukui-search-service/qml/IndexProgressBar.qml b/ukui-search-service/qml/IndexProgressBar.qml new file mode 100644 index 0000000..d4ac4f6 --- /dev/null +++ b/ukui-search-service/qml/IndexProgressBar.qml @@ -0,0 +1,8 @@ +import QtQuick 2.0 +import QtQuick.Controls 2.5 + +Item { + ProgressBar { + + } +} diff --git a/ukui-search-service/qml/qml.pri b/ukui-search-service/qml/qml.pri new file mode 100644 index 0000000..209c524 --- /dev/null +++ b/ukui-search-service/qml/qml.pri @@ -0,0 +1,8 @@ +# 编译qrc资源 +qmlFile.files = $$files($$PWD/*.qml) +RESOURCES += qmlFile + +DISTFILES += \ + $$PWD/IndexMonitor.qml \ + $$PWD/IndexProgressBar.qml + diff --git a/ukui-search-service/ukui-search-service.cpp b/ukui-search-service/ukui-search-service.cpp index f1d4abd..6a7b87c 100644 --- a/ukui-search-service/ukui-search-service.cpp +++ b/ukui-search-service/ukui-search-service.cpp @@ -1,11 +1,14 @@ #include +#include +#include #include "ukui-search-service.h" #include "dir-watcher.h" -#include "common.h" -#include +#include "file-utils.h" +#include "file-indexer-config.h" using namespace UkuiSearch; -UkuiSearchService::UkuiSearchService(int &argc, char *argv[], const QString &applicationName): QtSingleApplication (applicationName, argc, argv) +UkuiSearchService::UkuiSearchService(int &argc, char *argv[], const QString &applicationName) + : QtSingleApplication (applicationName, argc, argv) { qDebug()<<"ukui search service constructor start"; setApplicationVersion(QString("v%1").arg(VERSION)); @@ -15,9 +18,9 @@ UkuiSearchService::UkuiSearchService(int &argc, char *argv[], const QString &app connect(this, &QtSingleApplication::messageReceived, [=](QString msg) { this->parseCmd(msg, true); }); - DirWatcher::getDirWatcher(); - initGsettings(); - FileIndexManager::getInstance()->initIndexPathSetFunction(); + qRegisterMetaType("IndexType"); + m_indexScheduler = new IndexScheduler(this); + DirWatcher::getDirWatcher()->initDbusService(); } //parse cmd @@ -28,6 +31,14 @@ UkuiSearchService::UkuiSearchService(int &argc, char *argv[], const QString &app qDebug()<<"ukui search service constructor end"; } +UkuiSearchService::~UkuiSearchService() +{ + if(m_quickView) { + delete m_quickView; + m_quickView = nullptr; + } +} + void UkuiSearchService::parseCmd(QString msg, bool isPrimary) { QCommandLineParser parser; @@ -41,6 +52,9 @@ void UkuiSearchService::parseCmd(QString msg, bool isPrimary) QCommandLineOption startOption(QStringList()<<"i"<<"index", tr("start or stop file index"), "option"); parser.addOption(startOption); + QCommandLineOption monitorWindow(QStringList()<<"m"<<"monitor", tr("Show index monitor window")); + parser.addOption(monitorWindow); + // QCommandLineOption statusOption(QStringList()<<"s"<<"status", tr("show status of file index service")); // parser.addOption(statusOption); @@ -50,13 +64,17 @@ void UkuiSearchService::parseCmd(QString msg, bool isPrimary) if(parser.isSet(startOption)) { qDebug() << "options!!!!" << parser.value(startOption); if(parser.value(startOption) == "start") { - indexServiceSwitch(true); + m_indexScheduler->scheduleIndexing(); } else if (parser.value(startOption) == "stop") { - indexServiceSwitch(false); + m_indexScheduler->stop(); } } -// if(parser.isSet(statusOption)) { -// } + + if (parser.isSet(monitorWindow)) { + loadMonitorWindow(); + m_quickView->show(); + return; + } if (parser.isSet(quitOption)) { qApp->quit(); return; @@ -71,54 +89,11 @@ void UkuiSearchService::parseCmd(QString msg, bool isPrimary) } } -void UkuiSearchService::initGsettings() +void UkuiSearchService::loadMonitorWindow() { - const QByteArray id(UKUI_SEARCH_SCHEMAS); - if(QGSettings::isSchemaInstalled(id)) { - m_SearchGsettings = new QGSettings(id); - connect(m_SearchGsettings, &QGSettings::changed, this, [ = ](const QString &key) { - if(key == SEARCH_METHOD_KEY) { - setSearchMethodByGsettings(); - } - }); - if(m_SearchGsettings->keys().contains(SEARCH_METHOD_KEY)) { - setSearchMethodByGsettings(); - } - } else { - qWarning() << UKUI_SEARCH_SCHEMAS << " is not found!"; - } -} - -void UkuiSearchService::setSearchMethodByGsettings() -{ - bool isIndexSearch = m_SearchGsettings->get(SEARCH_METHOD_KEY).toBool(); - if(isIndexSearch) { - FileUtils::searchMethod = FileUtils::SearchMethod::INDEXSEARCH; - } else { - FileUtils::searchMethod = FileUtils::SearchMethod::DIRECTSEARCH; - } - FileIndexManager::getInstance()->searchMethod(FileUtils::searchMethod); - -} - -void UkuiSearchService::indexServiceSwitch(bool startIndex) -{ - if(startIndex) { - FileUtils::searchMethod = FileUtils::SearchMethod::INDEXSEARCH; - } else { - FileUtils::searchMethod = FileUtils::SearchMethod::DIRECTSEARCH; - } - FileIndexManager::getInstance()->searchMethod(FileUtils::searchMethod); - - const QByteArray id(UKUI_SEARCH_SCHEMAS); - if(QGSettings::isSchemaInstalled(id)) { - m_SearchGsettings = new QGSettings(id); - if(m_SearchGsettings->keys().contains(SEARCH_METHOD_KEY)) { - m_SearchGsettings->set(SEARCH_METHOD_KEY, startIndex); - } else { - qWarning() << SEARCH_METHOD_KEY << " is not found!"; - } - } else { - qWarning() << UKUI_SEARCH_SCHEMAS << " is not found!"; + if(!m_quickView) { + m_quickView = new QQuickView(); + m_quickView->rootContext()->setContextProperty("monitor", m_monitor); + m_quickView->setSource(m_qmlPath); } } diff --git a/ukui-search-service/ukui-search-service.h b/ukui-search-service/ukui-search-service.h index a9c13a9..c361e97 100644 --- a/ukui-search-service/ukui-search-service.h +++ b/ukui-search-service/ukui-search-service.h @@ -3,10 +3,13 @@ #include #include -#include +#include +#include #include "qtsingleapplication.h" -#include "file-index-manager.h" #include "common.h" +#include "index-scheduler.h" +#include "monitor.h" + namespace UkuiSearch { class UkuiSearchService : public QtSingleApplication @@ -14,14 +17,18 @@ class UkuiSearchService : public QtSingleApplication Q_OBJECT public: UkuiSearchService(int &argc, char *argv[], const QString &applicationName = "ukui-search-service"); + ~UkuiSearchService(); protected Q_SLOTS: void parseCmd(QString msg, bool isPrimary); private: - void initGsettings(); - void setSearchMethodByGsettings(); - void indexServiceSwitch(bool startIndex = true); - QGSettings *m_SearchGsettings; + void loadMonitorWindow(); + + IndexScheduler *m_indexScheduler = nullptr; + Monitor *m_monitor = nullptr; + QQuickView *m_quickView = nullptr; + QString m_qmlPath = "qrc:/qml/IndexMonitor.qml"; + }; } #endif // UKUISEARCHSERVICE_H diff --git a/ukui-search-service/ukui-search-service.pro b/ukui-search-service/ukui-search-service.pro index ecf65b0..a6b327a 100644 --- a/ukui-search-service/ukui-search-service.pro +++ b/ukui-search-service/ukui-search-service.pro @@ -1,4 +1,4 @@ -QT += core gui dbus +QT += core gui dbus quick greaterThan(QT_MAJOR_VERSION, 4): QT += widgets @@ -22,6 +22,7 @@ DEFINES += QT_DEPRECATED_WARNINGS include(../libsearch/libukui-search-headers.pri) include(../3rd-parties/qtsingleapplication/qtsingleapplication.pri) +include(./qml/qml.pri) LIBS += -L$$OUT_PWD/../libchinese-segmentation -lchinese-segmentation \ -L$$OUT_PWD/../libsearch -lukui-search diff --git a/ukui-search.pro b/ukui-search.pro index 9df4816..62bfea8 100644 --- a/ukui-search.pro +++ b/ukui-search.pro @@ -8,7 +8,7 @@ SUBDIRS += $$PWD/libchinese-segmentation \ $$PWD/ukui-search-app-data-service \ $$PWD/ukui-search-service-dir-manager -#SUBDIRS += tests +SUBDIRS += tests # The following define makes your compiler emit warnings if you use # any Qt feature that has been marked deprecated (the exact warnings