优化文件索引操作

This commit is contained in:
iaom 2022-10-26 18:01:40 +08:00
parent fb4eb18e00
commit dedd1ce00e
67 changed files with 2394 additions and 2925 deletions

View File

@ -47,7 +47,6 @@
#include <QTimer>
#include "search-app-widget-plugin/search.h"
#include "index-generator.h"
#include "libsearch.h"
#include "create-index-ask-dialog.h"
#include "search-line-edit.h"

View File

@ -1,8 +1,9 @@
#pragma once
#pragma once
#include <QMap>
#include <QString>
#include <QDir>
namespace UkuiSearch {
#define CONTENT_DATABASE_PATH_SLOT 1
#define CONTENT_DATABASE_SUFFIX_SLOT 2
@ -12,7 +13,6 @@ static const int LABEL_MAX_WIDTH = 300;
static const QString HOME_PATH = QDir::homePath();
static const QString INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/index_data");
static const QString CONTENT_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/content_index_data");
static const QString OCR_INDEX_PATH = HOME_PATH + QStringLiteral("/.config/org.ukui/ukui-search/ocr_index_data");
static const QString FILE_SEARCH_VALUE = QStringLiteral("0");
static const QString DIR_SEARCH_VALUE = QStringLiteral("1");
static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem");
@ -67,4 +67,23 @@ static const QMap<QString, bool> targetPhotographTypeMap = {
{"jpeg", true} // TODO 待完善,后续改为配置文件
};
//TODO Put things that needed to be put here here.
/**
* @brief The DataBaseType enum
* Basic
* Content
*/
enum class DataBaseType {
Basic = 0,
Content = 1
};
/**
* @brief The IndexType enum
* Index type, notice:OCR index is part of Content index.
*/
enum class IndexType {
Basic,
Contents,
OCR
};
}

View File

@ -218,7 +218,7 @@ void FileUtils::loadHanziTable(const QString &fileName) {
return;
}
QMimeType FileUtils::getMimetype(QString &path) {
QMimeType FileUtils::getMimetype(const QString &path) {
QMimeDatabase mdb;
QMimeType type = mdb.mimeTypeForFile(path, QMimeDatabase::MatchContent);
@ -488,7 +488,7 @@ QStringList FileUtils::findMultiToneWords(const QString &hanzi) {
* @param path: abs path
* @return docx to QString
*/
void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
void FileUtils::getDocxTextContent(const QString &path, QString &textcontent) {
//fix me :optimized by xpath??
QFileInfo info = QFileInfo(path);
if(!info.exists() || info.isDir())
@ -548,7 +548,7 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
*/
}
void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
void FileUtils::getPptxTextContent(const QString &path, QString &textcontent) {
QFileInfo info = QFileInfo(path);
if(!info.exists() || info.isDir())
return;
@ -660,7 +660,7 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
*/
}
void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
void FileUtils::getXlsxTextContent(const QString &path, QString &textcontent) {
QFileInfo info = QFileInfo(path);
if(!info.exists() || info.isDir())
return;
@ -724,7 +724,7 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
*/
}
void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
void FileUtils::getPdfTextContent(const QString &path, QString &textcontent) {
Poppler::Document *doc = Poppler::Document::load(path);
if(doc->isLocked()) {
delete doc;
@ -733,15 +733,19 @@ void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
const QRectF qf;
int pageNum = doc->numPages();
for(int i = 0; i < pageNum; ++i) {
textcontent.append(doc->page(i)->text(qf).replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
break;
Poppler::Page *page = doc->page(i);
if(page) {
textcontent.append(page->text(qf).replace("\n", "").replace("\r", " "));
delete page;
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
break;
}
}
delete doc;
return;
}
void FileUtils::getTxtContent(QString &path, QString &textcontent) {
void FileUtils::getTxtContent(const QString &path, QString &textcontent) {
QFile file(path);
if(!file.open(QIODevice::ReadOnly | QIODevice::Text))
return;
@ -935,7 +939,7 @@ QIcon FileUtils::iconFromTheme(const QString &name, const QIcon &iconDefault)
return QIcon::fromTheme(name, iconDefault);
}
bool FileUtils::isOpenXMLFileEncrypted(QString &path)
bool FileUtils::isOpenXMLFileEncrypted(const QString &path)
{
QFile file(path);
file.open(QIODevice::ReadOnly|QIODevice::Text);
@ -954,52 +958,53 @@ bool FileUtils::isOpenXMLFileEncrypted(QString &path)
}
}
//todo: only support docx, pptx, xlsx
bool FileUtils::isEncrypedOrUnreadable(QString path)
bool FileUtils::isEncrypedOrUnsupport(const QString& path, const QString& suffix)
{
QMimeType type = FileUtils::getMimetype(path);
QString name = type.name();
QFileInfo file(path);
QString strsfx = file.suffix();
if(name == "application/zip") {
if (strsfx == "docx" || strsfx == "pptx" || strsfx == "xlsx") {
if (suffix == "docx" || suffix == "pptx" || suffix == "xlsx") {
return FileUtils::isOpenXMLFileEncrypted(path);
} else if (strsfx == "uot" || strsfx == "uos" || strsfx == "uop") {
} else if (suffix == "uot" || suffix == "uos" || suffix == "uop") {
return false;
} else if (strsfx == "ofd") {
} else if (suffix == "ofd") {
return false;
} else {
return true;
}
} else if(name == "text/plain") {
if(strsfx.endsWith("txt"))
if(suffix.endsWith("txt"))
return false;
return true;
} else if(name == "text/html") {
if(strsfx.endsWith("html"))
if(suffix.endsWith("html"))
return false;
return true;
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
if(strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" ||
strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls" || strsfx == "uof") {
if(suffix == "doc" || suffix == "dot" || suffix == "wps" || suffix == "ppt" ||
suffix == "pps" || suffix == "dps" || suffix == "et" || suffix == "xls" || suffix == "uof") {
return false;
}
return true;
} else if(name == "application/pdf") {
if(strsfx == "pdf")
if(suffix == "pdf")
return false;
return true;
} else if(name == "application/xml" || name == "application/uof") {
if(strsfx == "uof") {
if(suffix == "uof") {
return false;
}
return true;
} else if (true == targetPhotographTypeMap[suffix]) {
return !isOcrSupportSize(path);
} else {
qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]";
// qInfo() << "Unsupport format:[" << path << "][" << type.name() << "]";
return true;
}
}
@ -1020,7 +1025,7 @@ bool FileUtils::isOcrSupportSize(QString path)
*/
QImage file(path);
if (file.height() < OCR_MIN_SIZE or file.width() < OCR_MIN_SIZE) {//限制图片像素尺寸
qDebug() << "file:" << path << "is not right size.";
// qDebug() << "file:" << path << "is not right size.";
return false;
} else
return true;
@ -1099,7 +1104,7 @@ QString FileUtils::wrapData(QLabel *p_label, const QString &text)
* <uof: uof:="OBJ16"/>
*
*/
void FileUtils::getUOFTextContent(QString &path, QString &textContent)
void FileUtils::getUOFTextContent(const QString &path, QString &textContent)
{
QFileInfo info(path);
if (!info.exists() || info.isDir()) {
@ -1278,7 +1283,7 @@ inline void FileUtils::findNodeAttr(const QDomElement &elem, QQueue<QString> &na
* @param path
* @param textContent
*/
void FileUtils::getUOF2TextContent(QString &path, QString &textContent)
void FileUtils::getUOF2TextContent(const QString &path, QString &textContent)
{
QFileInfo info = QFileInfo(path);
if (!info.exists() || info.isDir())
@ -1312,7 +1317,7 @@ void FileUtils::getUOF2TextContent(QString &path, QString &textContent)
file.close();
}
void FileUtils::getUOF2PPTContent(QString &path, QString &textContent)
void FileUtils::getUOF2PPTContent(const QString &path, QString &textContent)
{
QFileInfo info = QFileInfo(path);
if (!info.exists() || info.isDir())
@ -1409,7 +1414,7 @@ inline bool FileUtils::loadZipFileToDoc(QuaZip &zipFile, QDomDocument &doc, cons
* @param path
* @param textContent
*/
void FileUtils::getOFDTextContent(QString &path, QString &textContent)
void FileUtils::getOFDTextContent(const QString &path, QString &textContent)
{
QFileInfo info = QFileInfo(path);
if (!info.exists() || info.isDir())

View File

@ -85,24 +85,30 @@ public:
static void loadHanziTable(const QString &fileName);
//parse text,docx.....
static QMimeType getMimetype(QString &path);
static void getDocxTextContent(QString &path, QString &textcontent);
static void getPptxTextContent(QString &path, QString &textcontent);
static void getXlsxTextContent(QString &path, QString &textcontent);
static void getPdfTextContent(QString &path, QString &textcontent);
static void getTxtContent(QString &path, QString &textcontent);
static void getUOFTextContent(QString &path, QString &textContent);
static void getUOF2TextContent(QString &path, QString &textContent);
static void getUOF2PPTContent(QString &path, QString &textContent);
static void getOFDTextContent(QString &path, QString &textContent);
static QMimeType getMimetype(const QString &path);
static void getDocxTextContent(const QString &path, QString &textcontent);
static void getPptxTextContent(const QString &path, QString &textcontent);
static void getXlsxTextContent(const QString &path, QString &textcontent);
static void getPdfTextContent(const QString &path, QString &textcontent);
static void getTxtContent(const QString &path, QString &textcontent);
static void getUOFTextContent(const QString &path, QString &textContent);
static void getUOF2TextContent(const QString &path, QString &textContent);
static void getUOF2PPTContent(const QString &path, QString &textContent);
static void getOFDTextContent(const QString &path, QString &textContent);
static int openFile(QString &path, bool openInDir = false);
static bool copyPath(QString &path);
static QString escapeHtml(const QString &str);
static QString chineseSubString(const std::string &myStr,int start,int length);
static QIcon iconFromTheme(const QString &name, const QIcon &iconDefault);
static bool isOpenXMLFileEncrypted(QString &path);
static bool isEncrypedOrUnreadable(QString path);
static bool isOpenXMLFileEncrypted(const QString &path);
/**
* @brief isEncrypedOrUnsupport
* @param path
* @param suffix
* @return true if file(path) is not a support format for content index.
*/
static bool isEncrypedOrUnsupport(const QString &path, const QString &suffix);
static bool isOcrSupportSize(QString path);
static size_t maxIndexCount;
static unsigned short indexStatus;

View File

@ -27,8 +27,7 @@
#include <QByteArray>
#include <QSocketNotifier>
#include <QThreadPool>
#include "traverse-bfs.h"
#include <QMutex>
namespace UkuiSearch {
class FileSystemWatcherPrivate
@ -39,10 +38,10 @@ public:
~FileSystemWatcherPrivate();
void addWatch(const QStringList &pathList);
void addWatchWithBlackList(const QStringList &pathList, const QStringList &blackList);
QStringList removeWatch(const QString &path);
QString removeWatch(int wd);
void clearAll();
private:
void init();
@ -56,8 +55,6 @@ private:
QSocketNotifier* m_notifier = nullptr;
// wd -> url
QHash<int, QString> m_watchPathHash;
QThreadPool *m_pool = nullptr;
FileSystemWatcher *q = nullptr;
bool m_recursive = true;

View File

@ -39,19 +39,15 @@ FileSystemWatcherPrivate::FileSystemWatcherPrivate(FileSystemWatcher *parent) :
qDebug() << "setInotifyMaxUserWatches end";
init();
m_pool = new QThreadPool;
}
FileSystemWatcherPrivate::~FileSystemWatcherPrivate()
{
close(m_inotifyFd);
if(m_notifier) {
delete m_notifier;
m_notifier = nullptr;
}
if(m_pool) {
delete m_pool;
m_pool = nullptr;
}
}
void FileSystemWatcherPrivate::traverse(QStringList pathList)
@ -66,13 +62,12 @@ void FileSystemWatcherPrivate::traverse(QStringList pathList)
}
QFileInfoList list;
QDir dir;
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
dir.setFilter(QDir::Dirs | QDir::NoDotAndDotDot);
while(!queue.empty()) {
dir.setPath(queue.dequeue());
list = dir.entryInfoList();
for(auto i : list) {
if(i.isDir() && (!(i.isSymLink()))) {
if(!(i.isSymLink())) {
queue.enqueue(i.absoluteFilePath());
addWatch(i.absoluteFilePath());
}
@ -95,14 +90,54 @@ void FileSystemWatcherPrivate::addWatch(const QString &path)
void FileSystemWatcherPrivate::addWatch(const QStringList &pathList)
{
QtConcurrent::run(m_pool, [ = ](){
traverse(pathList);
});
traverse(pathList);
}
void FileSystemWatcherPrivate::addWatchWithBlackList(const QStringList &pathList, const QStringList &blackList)
{
QQueue<QString> bfs;
QStringList tmpPathList = pathList;
for(QString blackPath : blackList) {
for(QString path : pathList) {
if(FileUtils::isOrUnder(path, blackPath)) {
tmpPathList.removeOne(path);
}
}
}
for(QString path : tmpPathList) {
addWatch(path);
bfs.enqueue(path);
}
QFileInfoList list;
QDir dir;
QStringList tmpList = blackList;
dir.setFilter(QDir::Dirs | QDir::NoDotAndDotDot);
while(!bfs.empty()) {
dir.setPath(bfs.dequeue());
list = dir.entryInfoList();
for(auto i : list) {
bool isBlocked = false;
for(QString path : tmpList) {
if(i.absoluteFilePath() == path) {
isBlocked = true;
tmpList.removeOne(path);
break;
}
}
if(isBlocked)
continue;
if(!(i.isSymLink())) {
addWatch(i.absoluteFilePath());
bfs.enqueue(i.absoluteFilePath());
}
}
}
}
QStringList FileSystemWatcherPrivate::removeWatch(const QString &path)
{
m_pool->waitForDone();
QStringList paths;
for(int wd : m_watchPathHash.keys()) {
QString tmpPath = m_watchPathHash.value(wd);
@ -113,7 +148,6 @@ QStringList FileSystemWatcherPrivate::removeWatch(const QString &path)
}
}
}
return paths;
}
@ -123,6 +157,20 @@ QString FileSystemWatcherPrivate::removeWatch(int wd)
return m_watchPathHash.take(wd);
}
void FileSystemWatcherPrivate::clearAll()
{
if(m_inotifyFd > 0) {
close(m_inotifyFd);
m_inotifyFd = -1;
}
if(m_notifier) {
delete m_notifier;
m_notifier = nullptr;
}
m_watchPathHash.clear();
init();
}
void FileSystemWatcherPrivate::init()
{
if(m_inotifyFd < 0) {
@ -177,14 +225,24 @@ void FileSystemWatcher::addWatch(const QString &path)
d->addWatch(QStringList(path));
}
void FileSystemWatcher::addWatchWithBlackList(const QStringList &pathList, const QStringList &blackList)
{
d->addWatchWithBlackList(pathList, blackList);
}
QStringList FileSystemWatcher::removeWatch(const QString &path)
{
return d->removeWatch(path);
}
void FileSystemWatcher::clearAll()
{
d->clearAll();
}
void FileSystemWatcher::eventProcess(int socket)
{
qDebug() << "-----begin event process-----";
// qDebug() << "-----begin event process-----";
int avail;
if (ioctl(socket, FIONREAD, &avail) == EINVAL) {
qWarning() << "Did not receive an entire inotify event.";
@ -214,7 +272,7 @@ void FileSystemWatcher::eventProcess(int socket)
return;
}
qDebug() << "event mask:" << event->mask;
// qDebug() << "event mask:" << event->mask;
QString path;
if (event->mask & (EventDeleteSelf | EventMoveSelf)) {
@ -224,7 +282,7 @@ void FileSystemWatcher::eventProcess(int socket)
}
if(event->mask & EventCreate) {
qDebug() << path << "--EventCreate";
// qDebug() << path << "--EventCreate";
Q_EMIT created(path, event->mask & IN_ISDIR);
if(event->mask & IN_ISDIR && d->m_recursive) {
if(!QFileInfo(path).isSymLink()){
@ -233,34 +291,34 @@ void FileSystemWatcher::eventProcess(int socket)
}
}
if (event->mask & EventDeleteSelf) {
qDebug() << path << "--EventDeleteSelf";
d->removeWatch(event->wd);
Q_EMIT deleted(path, true);
// qDebug() << path << "--EventDeleteSelf";
if(event->mask & IN_ISDIR) {
d->removeWatch(event->wd);
}
Q_EMIT deleted(path, event->mask & IN_ISDIR);
}
if (event->mask & EventDelete) {
qDebug() << path << "--EventDelete";
// qDebug() << path << "--EventDelete";
// we watch all folders recursively. Thus, folder removing is reported in DeleteSelf.
if (!(event->mask & IN_ISDIR)) {
Q_EMIT deleted(path, false);
}
}
if (event->mask & EventModify) {
qDebug() << path << "--EventModify";
// qDebug() << path << "--EventModify";
Q_EMIT modified(path);
}
if (event->mask & EventMoveSelf) {
qDebug() << path << "--EventMoveSelf";
d->removeWatch(event->wd);
Q_EMIT moved(path, true);
//Problematic if the parent is not watched, otherwise
// handled by MoveFrom/MoveTo from the parent
// qDebug() << path << "--EventMoveSelf";
}
if (event->mask & EventMoveFrom) {
qDebug() << path << "--EventMoveFrom";
if (!(event->mask & IN_ISDIR)) {
Q_EMIT moved(path, false);
}
// qDebug() << path << "--EventMoveFrom";
Q_EMIT moved(path, event->mask & IN_ISDIR);
}
if (event->mask & EventMoveTo) {
qDebug() << path << "--EventMoveTo";
// qDebug() << path << "--EventMoveTo";
Q_EMIT created(path, event->mask & IN_ISDIR);
if (event->mask & IN_ISDIR && d->m_recursive) {
if(!QFileInfo(path).isSymLink()){
@ -269,11 +327,11 @@ void FileSystemWatcher::eventProcess(int socket)
}
}
if (event->mask & EventOpen) {
qDebug() << path << "--EventOpen";
// qDebug() << path << "--EventOpen";
Q_EMIT opened(path);
}
if (event->mask & EventUnmount) {
qDebug() << path << "--EventUnmount";
// qDebug() << path << "--EventUnmount";
if (event->mask & IN_ISDIR) {
d->removeWatch(event->wd);
}
@ -284,23 +342,23 @@ void FileSystemWatcher::eventProcess(int socket)
}
}
if (event->mask & EventAttributeChange) {
qDebug() << path << "--EventAttributeChange";
// qDebug() << path << "--EventAttributeChange";
Q_EMIT attributeChanged(path);
}
if (event->mask & EventAccess) {
qDebug() << path << "--EventAccess";
// qDebug() << path << "--EventAccess";
Q_EMIT accessed(path);
}
if (event->mask & EventCloseWrite) {
qDebug() << path << "--EventCloseWrite";
// qDebug() << path << "--EventCloseWrite";
Q_EMIT closedWrite(path);
}
if (event->mask & EventCloseRead) {
qDebug() << path << "--EventCloseRead";
// qDebug() << path << "--EventCloseRead";
Q_EMIT closedRead(path);
}
if (event->mask & EventIgnored) {
qDebug() << path << "--EventIgnored";
// qDebug() << path << "--EventIgnored";
}
i += sizeof(struct inotify_event) + event->len;
}

View File

@ -101,7 +101,9 @@ public:
public Q_SLOTS:
void addWatch(const QStringList &pathList);
void addWatch(const QString &path);
void addWatchWithBlackList(const QStringList &pathList, const QStringList &blackList);
QStringList removeWatch(const QString &path);
void clearAll();
Q_SIGNALS:

View File

@ -0,0 +1,65 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "basic-indexer.h"
#include "file-utils.h"
#include <QFileInfo>
#include <QUrl>
using namespace UkuiSearch;
BasicIndexer::BasicIndexer(const QString& filePath): m_filePath(filePath)
{
}
bool BasicIndexer::index()
{
QFileInfo info = QFileInfo(m_filePath);
//添加数据
m_document.setData(m_filePath);
//唯一term
m_document.setUniqueTerm(FileUtils::makeDocUterm(m_filePath));
//上层文件夹term用于在上层文件夹删除时删除自己
m_document.addTerm(FileUtils::makeDocUterm(m_filePath.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
//1-目录, 0-文件
m_document.addValue(1, QString((info.isDir() && (!info.isSymLink())) ? "1" : "0"));
//修改时间
QString time = info.lastModified().toString("yyyyMMddHHmmsszzz");
m_document.setIndexTime(time);
m_document.addSortableSerialiseValue(2, time);
QString indexName = info.fileName().toLower();
QStringList pinyinTextList = FileUtils::findMultiToneWords(info.fileName());
int i = 0;
int postingCount = 1; //terms post of Xapian document is started from 1!
while(postingCount <= indexName.size()) {
m_document.addPosting(QUrl::toPercentEncoding(indexName.at(i)).toStdString(), postingCount);
++postingCount;
++i;
}
for(QString& s : pinyinTextList) {
i = 0;
while(i < s.size()) {
m_document.addPosting(QUrl::toPercentEncoding(s.at(i)).toStdString(), postingCount);
++postingCount;
++i;
}
}
return true;
}

View File

@ -0,0 +1,36 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#ifndef BASICINDEXER_H
#define BASICINDEXER_H
#include "document.h"
namespace UkuiSearch {
class BasicIndexer
{
public:
BasicIndexer(const QString& filePath);
bool index();
Document document() { return m_document; }
private:
QString m_filePath;
Document m_document;
};
}
#endif // BASICINDEXER_H

View File

@ -1,174 +0,0 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#include "construct-document.h"
#include "file-utils.h"
#include "chinese-segmentation.h"
#include <QDebug>
#include <QThread>
#include <QUrl>
//extern QList<Document> *g_docListForPath;
//extern QMutex g_mutexDocListForPath;
using namespace UkuiSearch;
ConstructDocumentForPath::ConstructDocumentForPath(QVector<QString> list) {
this->setAutoDelete(true);
m_list = std::move(list);
}
void ConstructDocumentForPath::run() {
// qDebug()<<"ConstructDocumentForPath";
// if(!UkuiSearch::g_docListForPath)
// UkuiSearch::g_docListForPath = new QVector<Document>;
// qDebug()<<g_docListForPath->size();
QString index_text = m_list.at(0).toLower();
QString sourcePath = m_list.at(1);
Document doc;
//多音字版
//现加入首字母
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".", ""));
// if(!pinyin_text_list.isEmpty())
// {
// for (QString& i : pinyin_text_list){
// i.replace("", " ");
// i = i.simplified();
// }
// doc.setIndexText(pinyin_text_list);
// }
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
QString upTerm = QString::fromStdString("ZEEKERUPTERM" + FileUtils::makeDocUterm(sourcePath.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
// qDebug()<<"sourcePath"<<sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep);
// qDebug() << "sourcePath ---------------------------: " << sourcePath;
// qDebug() << "sourcePath.section -------------------: " << sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep);
// qDebug() << "ConstructDocumentForPath -- uniqueterm: " << uniqueterm;
// qDebug() << "ConstructDocumentForPath -- upTerm : " << upTerm;
doc.setData(sourcePath);
doc.setUniqueTerm(uniqueterm);
doc.addTerm(upTerm);
doc.addValue(1, m_list.at(2));
doc.addSortableSerialiseValue(2, m_list.at(3));
/* QStringList temp;
// temp.append(index_text);
temp.append(pinyin_text_list)*/;
int i = 0;
int postingCount = 1; //terms post of Xapian document is start from 1!
while(postingCount <= index_text.size()) {
doc.addPosting(QUrl::toPercentEncoding(index_text.at(i)).toStdString(), postingCount);
++postingCount;
++i;
}
for(QString& s : pinyin_text_list) {
i = 0;
while(i < s.size()) {
doc.addPosting(QUrl::toPercentEncoding(s.at(i)).toStdString(), postingCount);
++postingCount;
++i;
}
}
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
IndexGenerator::g_mutexDocListForPath.lock();
IndexGenerator::g_docListForPath.append(doc);
IndexGenerator::g_mutexDocListForPath.unlock();
// qDebug()<<"ConstructDocumentForPath finish";
return;
}
ConstructDocumentForContent::ConstructDocumentForContent(QString path) {
this->setAutoDelete(true);
m_path = std::move(path);
}
void ConstructDocumentForContent::run() {
// qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
//构造文本索引的document
QString content;
QString suffix;
FileReader::getTextContent(m_path, content, suffix);
Document doc;
doc.setUniqueTerm(FileUtils::makeDocUterm(m_path));
doc.addTerm("ZEEKERUPTERM" + FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
doc.addValue(1, m_path);
doc.addValue(2, suffix);
if(content.isEmpty()) {
doc.reuireDeleted();
} else {
doc.setData(content);
//'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString());
for(size_t i = 0; i < term.size(); ++i) {
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
}
term.clear();
term.shrink_to_fit();
}
IndexGenerator::g_mutexDocListForContent.lock();
IndexGenerator::g_docListForContent.append(doc);
IndexGenerator::g_mutexDocListForContent.unlock();
content.clear();
content.squeeze();
return;
}
ConstructDocumentForOcr::ConstructDocumentForOcr(QString path)
{
this->setAutoDelete(true);
m_path = std::move(path);
}
void ConstructDocumentForOcr::run()
{
QString content;
QString suffix;
FileReader::getTextContent(m_path, content, suffix);
Document doc;
doc.setUniqueTerm(FileUtils::makeDocUterm(m_path));
doc.addTerm("ZEEKERUPTERM" + FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
doc.addValue(1, m_path);
doc.addValue(2, suffix);
if(content.isEmpty()) {
doc.reuireDeleted();
} else {
doc.setData(content);
//'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.toStdString());
for(size_t i = 0; i < term.size(); ++i) {
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
}
term.clear();
term.shrink_to_fit();
}
IndexGenerator::g_mutexDocListForOcr.lock();
IndexGenerator::g_docListForOcr.append(doc);
IndexGenerator::g_mutexDocListForOcr.unlock();
content.clear();
content.squeeze();
}

View File

@ -1,63 +0,0 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#ifndef CONSTRUCTDOCUMENT_H
#define CONSTRUCTDOCUMENT_H
#include <QRunnable>
#include <QVector>
//#include <QMetaObject>
#include "document.h"
#include "index-generator.h"
namespace UkuiSearch {
class IndexGenerator;
class ConstructDocumentForPath : public QRunnable {
public:
explicit ConstructDocumentForPath(QVector<QString> list);
~ConstructDocumentForPath() = default;
protected:
void run();
private:
QVector<QString> m_list;
};
class ConstructDocumentForContent : public QRunnable {
public:
explicit ConstructDocumentForContent(QString path);
~ConstructDocumentForContent() = default;
protected:
void run();
private:
QString m_path;
};
class ConstructDocumentForOcr : public QRunnable {
public:
explicit ConstructDocumentForOcr(QString path);
~ConstructDocumentForOcr() = default;
protected:
void run();
private:
QString m_path;
};
}
#endif // CONSTRUCTDOCUMENT_H

View File

@ -0,0 +1,29 @@
#include "database.h"
using namespace UkuiSearch;
Database::Database(const DataBaseType &type)
{
switch (type) {
case DataBaseType::Basic:
m_database = new Xapian::Database(INDEX_PATH.toStdString());
break;
case DataBaseType::Content:
m_database = new Xapian::Database(CONTENT_INDEX_PATH.toStdString());
break;
default:
break;
}
}
Database::~Database()
{
if(m_database) {
delete m_database;
m_database = nullptr;
}
}
uint Database::getIndexDocCount()
{
return m_database->get_doccount();
}

View File

@ -0,0 +1,18 @@
#ifndef DATABASE_H
#define DATABASE_H
#include <xapian.h>
#include "common.h"
namespace UkuiSearch {
class Database
{
public:
Database(const DataBaseType &type);
~Database();
uint getIndexDocCount();
private:
Xapian::Database *m_database;
};
}
#endif // DATABASE_H

View File

@ -84,43 +84,31 @@ void Document::setUniqueTerm(QString term) {
return;
m_document.add_term(term.toStdString());
// m_unique_term = new QString(term);
m_unique_term = std::move(term.toStdString());
m_uniqueTerm = std::move(term.toStdString());
}
void Document::setUniqueTerm(std::string term) {
if(term.empty())
return;
m_document.add_term(term);
m_unique_term = term;
m_uniqueTerm = std::move(term);
}
std::string Document::getUniqueTerm() {
// qDebug()<<"m_unique_term!"<<*m_unique_term;
// qDebug() << QString::fromStdString(m_unique_term.toStdString());
return m_unique_term;//.toStdString();
std::string Document::getUniqueTerm() const
{
return m_uniqueTerm;
}
void Document::setIndexText(QStringList indexText) {
// QStringList indexTextList = indexText;
// m_index_text = new QStringList(indexText);
m_index_text = std::move(indexText);
}
QStringList Document::getIndexText() {
return m_index_text;
}
Xapian::Document Document::getXapianDocument() {
Xapian::Document Document::getXapianDocument() const{
return m_document;
}
void Document::reuireDeleted()
void Document::setIndexTime(const QString &time)
{
m_shouldDelete = true;
m_time = time.toStdString();
}
bool Document::isRequiredDeleted()
const std::string Document::indexTime() const
{
return m_shouldDelete;
return m_time;
}

View File

@ -31,15 +31,13 @@ public:
~Document() {}
Document(const Document& other) {
m_document = other.m_document;
m_index_text = other.m_index_text;
m_unique_term = other.m_unique_term;
m_shouldDelete = other.m_shouldDelete;
m_uniqueTerm = other.m_uniqueTerm;
m_time = other.m_time;
}
void operator=(const Document& other) {
m_document = other.m_document;
m_index_text = other.m_index_text;
m_unique_term = other.m_unique_term;
m_shouldDelete = other.m_shouldDelete;
m_uniqueTerm = other.m_uniqueTerm;
m_time = other.m_time;
}
void setData(QString &data);
void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
@ -51,18 +49,15 @@ public:
void addSortableSerialiseValue(unsigned slot, QString value);
void setUniqueTerm(QString term);
void setUniqueTerm(std::string term);
std::string getUniqueTerm();
void setIndexText(QStringList indexText);
QStringList getIndexText();
Xapian::Document getXapianDocument();
void reuireDeleted();
bool isRequiredDeleted();
std::string getUniqueTerm() const;
Xapian::Document getXapianDocument() const;
void setIndexTime(const QString& time);
const std::string indexTime() const;
private:
Xapian::Document m_document;
QStringList m_index_text;
//QString m_unique_term;
std::string m_unique_term;
bool m_shouldDelete = false;
std::string m_uniqueTerm;
std::string m_time;
};
}

View File

@ -0,0 +1,60 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "file-content-indexer.h"
#include "file-reader.h"
#include "file-utils.h"
#include "chinese-segmentation.h"
using namespace UkuiSearch;
fileContentIndexer::fileContentIndexer(const QString &filePath): m_filePath(filePath)
{
}
bool fileContentIndexer::index()
{
QString content;
QFileInfo info(m_filePath);
QString suffix = info.suffix();
FileReader::getTextContent(m_filePath, content, suffix);
if(content.isEmpty()) {
return false;
}
m_document.setData(content);
//'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString());
content.clear();
content.squeeze();
for(size_t i = 0; i < term.size(); ++i) {
m_document.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
}
term.clear();
term.shrink_to_fit();
m_document.setUniqueTerm(FileUtils::makeDocUterm(m_filePath));
m_document.addTerm(FileUtils::makeDocUterm(m_filePath.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
m_document.addValue(1, m_filePath);
m_document.addValue(2, suffix);
m_document.setIndexTime(info.lastModified().toString("yyyyMMddHHmmsszzz"));
return true;
}

View File

@ -0,0 +1,37 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#ifndef FILECONTENTINDEXER_H
#define FILECONTENTINDEXER_H
#include "document.h"
namespace UkuiSearch {
class fileContentIndexer
{
public:
fileContentIndexer(const QString& filePath);
bool index();
Document document() { return m_document; }
private:
QString m_filePath;
Document m_document;
};
}
#endif // FILECONTENTINDEXER_H

View File

@ -1,82 +0,0 @@
#include "file-index-manager.h"
#include "dir-watcher.h"
#include "common.h"
using namespace UkuiSearch;
static FileIndexManager* global_instance = nullptr;
FileIndexManager::FileIndexManager(QObject *parent) : QObject(parent), m_semaphore(INDEX_SEM, 1, QSystemSemaphore::AccessMode::Create)
{
m_fi = FirstIndex::getInstance();
m_iw = InotifyWatch::getInstance();
}
FileIndexManager *FileIndexManager::getInstance()
{
if(!global_instance) {
global_instance = new FileIndexManager();
}
return global_instance;
}
void FileIndexManager::searchMethod(FileUtils::SearchMethod sm) {
qWarning() << "searchMethod start: " << static_cast<int>(sm);
if(FileUtils::SearchMethod::INDEXSEARCH == sm || FileUtils::SearchMethod::DIRECTSEARCH == sm) {
FileUtils::searchMethod = sm;
} else {
qWarning("enum class error!!!\n");
}
if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::indexStatus) {
qDebug() << "start first index";
m_fi->rebuildDatebase();
qDebug() << "start inotify index";
if(!this->m_iw->isRunning()) {
this->m_iw->start();
}
qDebug() << "Search method has been set to INDEXSEARCH";
}
if(FileUtils::SearchMethod::DIRECTSEARCH == sm) {
m_iw->stopWatch();
}
qWarning() << "searchMethod end: " << static_cast<int>(FileUtils::searchMethod);
}
void FileIndexManager::initIndexPathSetFunction()
{
const QByteArray id(UKUI_SEARCH_SCHEMAS);
if(QGSettings::isSchemaInstalled(id)) {
m_searchSettings = new QGSettings(id);
if(!m_searchSettings->keys().contains(SEARCH_METHOD_KEY)) {
qWarning() << "Can not find gsettings key:" << UKUI_SEARCH_SCHEMAS << SEARCH_METHOD_KEY;
return;
}
} else {
qWarning() << "Can not find gsettings:" << UKUI_SEARCH_SCHEMAS;
return;
}
connect(DirWatcher::getDirWatcher(), &DirWatcher::appendIndexItem, this, &FileIndexManager::handleIndexPathAppend, Qt::QueuedConnection);
connect(DirWatcher::getDirWatcher(), &DirWatcher::removeIndexItem, this, &FileIndexManager::handleRemovePathAppend, Qt::QueuedConnection);
DirWatcher::getDirWatcher()->initDbusService();
}
void FileIndexManager::handleIndexPathAppend(const QString path, const QStringList blockList)
{
qDebug() << "Add Index path:" << path << " blockList:" << blockList;
if(!m_searchSettings->get(SEARCH_METHOD_KEY).toBool()) {
m_searchSettings->set(SEARCH_METHOD_KEY, true);
} else {
m_fi->addIndexPath(path, blockList);
m_iw->addIndexPath(path, blockList);
}
}
void FileIndexManager::handleRemovePathAppend(const QString path)
{
qDebug() << "Remove index path:" << path;
if(m_searchSettings->get(SEARCH_METHOD_KEY).toBool()) {
m_iw->removeIndexPath(path, true);
} else {
m_iw->removeIndexPath(path, false);
}
}

View File

@ -1,30 +0,0 @@
#ifndef SEARCHMETHODMANAGER_H
#define SEARCHMETHODMANAGER_H
#include <QObject>
#include <QSystemSemaphore>
#include <QGSettings/QGSettings>
#include "first-index.h"
//#include "inotify-index.h"
#include "inotify-watch.h"
namespace UkuiSearch {
class FileIndexManager : public QObject {
Q_OBJECT
public:
static FileIndexManager *getInstance();
void searchMethod(FileUtils::SearchMethod sm);
void initIndexPathSetFunction();
private Q_SLOTS:
void handleIndexPathAppend(const QString path, const QStringList blockList);
void handleRemovePathAppend(const QString path);
private:
FileIndexManager(QObject *parent = nullptr);
FirstIndex *m_fi;
// InotifyIndex* m_ii;
InotifyWatch *m_iw = nullptr;
QSystemSemaphore m_semaphore;
QGSettings *m_searchSettings = nullptr;
};
}
#endif // SEARCHMETHODMANAGER_H

View File

@ -0,0 +1,109 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "file-indexer-config.h"\
#include <mutex>
#include <QDebug>
#include <QDir>
#define INDEX_SETTINGS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-service.conf"
static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings");
static const QString FILE_INDEX_ENABLE_KEY = QStringLiteral("fileIndexEnable");
static const QString CONTENT_INDEX_ENABLE_KEY = QStringLiteral("contentIndexEnable");
static const QString OCR_ENABLE_KEY = QStringLiteral("ocrEnable");
static const QString META_DATA_INDEX_ENABLE_KEY = QStringLiteral("metaDataIndexEnable");
static std::once_flag flag;
static FileIndexerConfig *global_intance = nullptr;
FileIndexerConfig *FileIndexerConfig::getInstance()
{
std::call_once(flag, [ & ] {
global_intance = new FileIndexerConfig();
});
return global_intance;
}
FileIndexerConfig::FileIndexerConfig(QObject *parent) \
: QObject(parent),
m_dirWatcher(DirWatcher::getDirWatcher())
{
connect(m_dirWatcher, &DirWatcher::appendIndexItem, this, &FileIndexerConfig::appendIndexDir);
connect(m_dirWatcher, &DirWatcher::removeIndexItem, this, &FileIndexerConfig::removeIndexDir);
const QByteArray id(UKUI_SEARCH_SCHEMAS);
if(QGSettings::isSchemaInstalled(id)) {
m_gsettings = new QGSettings(id, QByteArray(), this);
connect(m_gsettings, &QGSettings::changed, this, [ = ](const QString &key) {
if(key == FILE_INDEX_ENABLE_KEY) {
Q_EMIT this->fileIndexEnableStatusChanged(m_gsettings->get(FILE_INDEX_ENABLE_KEY).toBool());
}
});
} else {
qWarning() << UKUI_SEARCH_SCHEMAS << " is not found!";
}
m_settings = new QSettings(INDEX_SETTINGS, QSettings::IniFormat, this);
}
FileIndexerConfig::~FileIndexerConfig()
{
}
QStringList FileIndexerConfig::currentIndexableDir()
{
return DirWatcher::getDirWatcher()->currentIndexableDir();
}
QStringList FileIndexerConfig::currentBlackListOfIndex()
{
return DirWatcher::getDirWatcher()->currentBlackListOfIndex();
}
bool FileIndexerConfig::isFileIndexEnable()
{
if(m_gsettings) {
if(m_gsettings->keys().contains(FILE_INDEX_ENABLE_KEY)) {
return m_gsettings->get(FILE_INDEX_ENABLE_KEY).toBool();
} else {
qWarning() << "FileIndexerConfig: Can not find key:" << FILE_INDEX_ENABLE_KEY << "in" << UKUI_SEARCH_SCHEMAS;
return false;
}
} else {
qWarning() << "FileIndexerConfig:" << UKUI_SEARCH_SCHEMAS << " is not found!";
return false;
}
}
bool FileIndexerConfig::isContentIndexEnable()
{
return m_settings->value(CONTENT_INDEX_ENABLE_KEY, true).toBool();
}
bool FileIndexerConfig::isOCREnable()
{
return m_settings->value(OCR_ENABLE_KEY, true).toBool();
}
bool FileIndexerConfig::isMetaDataIndexEnable()
{
return m_settings->value(META_DATA_INDEX_ENABLE_KEY, true).toBool();
}

View File

@ -0,0 +1,93 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#ifndef FILEINDEXERCONFIG_H
#define FILEINDEXERCONFIG_H
#include <QObject>
#include <QSettings>
#include <QGSettings>
#include <QAtomicInt>
#include "dir-watcher.h"
class FileIndexerConfig : public QObject
{
Q_OBJECT
public:
static FileIndexerConfig* getInstance();
/**
* @brief currentIndexableDir
* @return
*/
QStringList currentIndexableDir();
/**
* @brief currentBlackListOfIndex
* @return
*/
QStringList currentBlackListOfIndex();
/**
* @brief isFileIndexEnable
* @return
*/
bool isFileIndexEnable();
/**
* @brief isContentIndexEnable
* @return
*/
bool isContentIndexEnable();
/**
* @brief isOCREnable
* @return OCR功能
*/
bool isOCREnable();
/**
* @brief isMetaDataIndexEnable
* @return
*/
bool isMetaDataIndexEnable();
Q_SIGNALS:
/**
* @brief appendIndexDir
*
*/
void appendIndexDir(const QString&, const QStringList&);
/**
* @brief removeIndexDir
*
*/
void removeIndexDir(const QString&);
/**
* @brief fileIndexEnableStatusChanged
*
*/
void fileIndexEnableStatusChanged(bool);
private:
explicit FileIndexerConfig(QObject *parent = nullptr);
~FileIndexerConfig();
DirWatcher *m_dirWatcher = nullptr;
QGSettings *m_gsettings = nullptr;
QSettings *m_settings = nullptr;
QAtomicInt m_stop;
};
#endif // FILEINDEXERCONFIG_H

View File

@ -22,13 +22,12 @@
#include "binary-parser.h"
#include "ocrobject.h"
using namespace UkuiSearch;
FileReader::FileReader(QObject *parent) : QObject(parent) {
FileReader::FileReader(QObject *parent) : QObject(parent)
{
}
void FileReader::getTextContent(QString path, QString &textContent, QString &suffix) {
QFileInfo file(path);
suffix = file.suffix();
void FileReader::getTextContent(const QString &path, QString &textContent, const QString &suffix)
{
if (suffix == "docx") {
FileUtils::getDocxTextContent(path, textContent);
} else if (suffix == "pptx") {

View File

@ -28,7 +28,7 @@ class FileReader : public QObject {
public:
explicit FileReader(QObject *parent = nullptr);
~FileReader() = default;
static void getTextContent(QString path, QString &textContent, QString &suffix);
static void getTextContent(const QString &path, QString &textContent, const QString &suffix);
};
}

View File

@ -1,3 +1,22 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "file-search-plugin.h"
#include "search-manager.h"
#include <QWidget>

View File

@ -1,3 +1,22 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#ifndef FILESEARCHPLUGIN_H
#define FILESEARCHPLUGIN_H

View File

@ -1,358 +0,0 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#include <QFileInfo>
#include <QDebug>
#include <QtConcurrent>
#include <QThread>
#include <chinese-segmentation.h>
#include <QUrl>
#include "file-searcher.h"
#include "global-settings.h"
size_t FileSearcher::uniqueSymbol1 = 0;
size_t FileSearcher::uniqueSymbol2 = 0;
size_t FileSearcher::uniqueSymbol3 = 0;
QMutex FileSearcher::m_mutex1;
QMutex FileSearcher::m_mutex2;
QMutex FileSearcher::m_mutex3;
FileSearcher::FileSearcher(QObject *parent) : QObject(parent) {
}
FileSearcher::~FileSearcher() {
}
int FileSearcher::getCurrentIndexCount() {
try {
Xapian::Database db(INDEX_PATH);
return db.get_doccount();
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return 0;
}
}
void FileSearcher::onKeywordSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, QQueue<QPair<QString, QStringList>> *searchResultContent) {
m_mutex1.lock();
++uniqueSymbol1;
m_mutex1.unlock();
m_mutex2.lock();
++uniqueSymbol2;
m_mutex2.unlock();
m_mutex3.lock();
++uniqueSymbol3;
m_mutex3.unlock();
m_search_result_file = searchResultFile;
m_search_result_dir = searchResultDir;
m_search_result_content = searchResultContent;
//file
QtConcurrent::run([&, uniqueSymbol1, keyword]() {
if(!m_search_result_file->isEmpty())
m_search_result_file->clear();
int begin = 0;
int num = 5;
int resultCount = 0;
int total = 0;
while(total < 100) {
resultCount = keywordSearchfile(uniqueSymbol1, keyword, "0", 1, begin, num);
if(resultCount == 0 || resultCount == -1)
break;
total += resultCount;
begin += num;
}
return;
});
// Q_EMIT this->resultFile(m_search_result_file);
//dir
QtConcurrent::run([&, uniqueSymbol2, keyword]() {
if(!m_search_result_dir->isEmpty())
m_search_result_dir->clear();
int begin = 0;
int num = 5;
int resultCount = 0;
int total = 0;
while(total < 100) {
resultCount = keywordSearchfile(uniqueSymbol2, keyword, "1", 1, begin, num);
if(resultCount == 0 || resultCount == -1)
break;
total += resultCount;
begin += num;
}
return;
});
// Q_EMIT this->resultDir(m_search_result_dir);
//content
QtConcurrent::run([&, uniqueSymbol3, keyword]() {
if(!m_search_result_content->isEmpty())
m_search_result_content->clear();
int begin = 0;
int num = 5;
int resultCount = 0;
int total = 0;
while(total < 50) {
resultCount = keywordSearchContent(uniqueSymbol3, keyword, begin, num);
if(resultCount == 0 || resultCount == -1)
break;
total += resultCount;
begin += num;
}
return;
});
// Q_EMIT this->resultContent(m_search_result_content);
}
int FileSearcher::keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) {
try {
qDebug() << "--keywordSearchfile start--";
Xapian::Database db(INDEX_PATH);
Xapian::Query query = creatQueryForFileSearch(keyword, db);
Xapian::Enquire enquire(db);
Xapian::Query queryFile;
if(!value.isEmpty()) {
std::string slotValue = value.toStdString();
Xapian::Query queryValue = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, slot, slotValue, slotValue);
queryFile = Xapian::Query(Xapian::Query::OP_AND, query, queryValue);
} else {
queryFile = query;
}
qDebug() << "keywordSearchfile:" << QString::fromStdString(queryFile.get_description());
enquire.set_query(queryFile);
Xapian::MSet result = enquire.get_mset(begin, num);
int resultCount = result.size();
qDebug() << "keywordSearchfile results count=" << resultCount;
if(result.size() == 0)
return 0;
if(getResult(uniqueSymbol, result, value) == -1)
return -1;
qDebug() << "--keywordSearchfile finish--";
return resultCount;
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
qDebug() << "--keywordSearchfile finish--";
return -1;
}
}
int FileSearcher::keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin, int num) {
try {
qDebug() << "--keywordSearchContent search start--";
Xapian::Database db(CONTENT_INDEX_PATH);
Xapian::Enquire enquire(db);
Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db);
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword);
//Creat a query
std::string words;
for(int i = 0; i < sKeyWord.size(); i++) {
words.append(sKeyWord.at(i).word).append(" ");
}
Xapian::Query query = qp.parse_query(words);
// std::vector<Xapian::Query> v;
// for(int i=0;i<sKeyWord.size();i++)
// {
// v.push_back(Xapian::Query(sKeyWord.at(i).word));
// qDebug()<<QString::fromStdString(sKeyWord.at(i).word);
// }
// Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
qDebug() << "keywordSearchContent:" << QString::fromStdString(query.get_description());
enquire.set_query(query);
Xapian::MSet result = enquire.get_mset(begin, num);
int resultCount = result.size();
if(result.size() == 0)
return 0;
qDebug() << "keywordSearchContent results count=" << resultCount;
if(getContentResult(uniqueSymbol, result, words) == -1)
return -1;
qDebug() << "--keywordSearchContent search finish--";
return resultCount;
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
qDebug() << "--keywordSearchContent search finish--";
return -1;
}
}
Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Database &db) {
// Xapian::QueryParser qp;
// qp.set_default_op(Xapian::Query::OP_PHRASE);
// qp.set_database(db);
auto userInput = keyword.toLower();
// userInput = userInput.replace(".","").simplified();
// userInput = QString(QUrl::toPercentEncoding(userInput)).replace(""," ").simplified();
// std::string queryStr = keyword.replace(".","").replace(" ","").replace(""," ").simplified().toStdString();
// std::string s =db.get_spelling_suggestion(queryStr,10);
// qDebug()<<"spelling_suggestion!"<<QString::fromStdString(s);
// qDebug()<<"queryStr!"<<QString::fromStdString(queryStr);
//Creat a query
// Xapian::Query queryPhrase = qp.parse_query(userInput.toStdString(),Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_SYNONYM);
std::vector<Xapian::Query> v;
for(int i = 0; i < userInput.size(); i++) {
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.at(i)).toStdString()));
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
}
Xapian::Query queryPhrase = Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
// Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
// Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase);
// qDebug()<<QString::fromStdString(query.get_description());
return queryPhrase;
}
Xapian::Query FileSearcher::creatQueryForContentSearch(QString keyword, Xapian::Database &db) {
}
int FileSearcher::getResult(size_t uniqueSymbol, Xapian::MSet &result, QString value) {
//QStringList *pathTobeDelete = new QStringList;
//Delete those path doc which is not already exist.
// QStringList searchResult = QStringList();
for(auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
Xapian::weight docScoreWeight = it.get_weight();
Xapian::percent docScorePercent = it.get_percent();
QString path = QString::fromStdString(data);
if(isBlocked(path))
continue;
QFileInfo info(path);
if(!info.exists()) {
// pathTobeDelete->append(QString::fromStdString(data));
qDebug() << path << "is not exist!!";
} else {
switch(value.toInt()) {
case 1:
m_mutex1.lock();
if(uniqueSymbol == FileSearcher::uniqueSymbol1) {
m_search_result_dir->enqueue(path);
m_mutex1.unlock();
} else {
m_mutex1.unlock();
return -1;
}
break;
case 0:
m_mutex2.lock();
if(uniqueSymbol == FileSearcher::uniqueSymbol2) {
m_search_result_file->enqueue(path);
m_mutex2.unlock();
} else {
m_mutex2.unlock();
return -1;
}
break;
default:
break;
}
// searchResult.append(path);
}
qDebug() << "doc=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
}
// if(!pathTobeDelete->isEmpty())
// deleteAllIndex(pathTobeDelete)
return 0;
}
int FileSearcher::getContentResult(size_t uniqueSymbol, Xapian::MSet &result, std::string &keyWord) {
//QStringList *pathTobeDelete = new QStringList;
//Delete those path doc which is not already exist.
QString wordTobeFound = QString::fromStdString(keyWord).section(" ", 0, 0);
int size = wordTobeFound.size();
int totalSize = QString::fromStdString(keyWord).size();
if(totalSize < 5)
totalSize = 5;
// QMap<QString,QStringList> searchResult;
for(auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
double docScoreWeight = it.get_weight();
Xapian::percent docScorePercent = it.get_percent();
QString path = QString::fromStdString(doc.get_value(1));
if(isBlocked(path))
continue;
QFileInfo info(path);
if(!info.exists()) {
// pathTobeDelete->append(QString::fromStdString(data));
qDebug() << path << "is not exist!!";
continue;
}
// Construct snippets containing keyword.
QStringList snippets;
auto term = doc.termlist_begin();
term.skip_to(wordTobeFound.toStdString());
int count = 0;
for(auto pos = term.positionlist_begin(); pos != term.positionlist_end() && count < 6; ++pos) {
QByteArray snippetByte = QByteArray::fromStdString(data);
QString snippet = "..." + QString(snippetByte.left(*pos)).right(size + totalSize) + QString(snippetByte.mid(*pos, -1)).left(size + totalSize) + "...";
// qDebug()<<snippet;
snippets.append(snippet);
++count;
}
m_mutex3.lock();
if(uniqueSymbol == FileSearcher::uniqueSymbol3) {
m_search_result_content->enqueue(qMakePair(path, snippets));
m_mutex3.unlock();
} else {
m_mutex3.unlock();
return -1;
}
// searchResult.insert(path,snippets);
qDebug() << "path=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
}
// if(!pathTobeDelete->isEmpty())
// deleteAllIndex(pathTobeDelete)
return 0;
}
bool FileSearcher::isBlocked(QString &path) {
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
for(QString i : blockList) {
if(path.startsWith(i.prepend("/")))
return true;
}
return false;
}

View File

@ -1,82 +0,0 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#ifndef FILESEARCHER_H
#define FILESEARCHER_H
#include <QObject>
#include <xapian.h>
#include <QStandardPaths>
#include <QVector>
#include <QMap>
#include <QQueue>
#include <QPair>
#include <QMutex>
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
class FileSearcher : public QObject {
Q_OBJECT
public:
explicit FileSearcher(QObject *parent = nullptr);
~FileSearcher();
static int getCurrentIndexCount();
static size_t uniqueSymbol1;
static size_t uniqueSymbol2;
static size_t uniqueSymbol3;
static QMutex m_mutex1;
static QMutex m_mutex2;
static QMutex m_mutex3;
public Q_SLOTS:
void onKeywordSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, QQueue<QPair<QString, QStringList>> *searchResultContent);
Q_SIGNALS:
void resultFile(QQueue<QString> *);
void resultDir(QQueue<QString> *);
void resultContent(QQueue<QPair<QString, QStringList>> *);
private:
int keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value, unsigned slot = 1, int begin = 0, int num = 20);
int keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
/**
* @brief FileSearcher::creatQueryForFileSearch
* This part shall be optimized frequently to provide a more stable search function.
* @param keyword
* @param db
* @return Xapian::Query
*/
Xapian::Query creatQueryForFileSearch(QString keyword, Xapian::Database &db);
Xapian::Query creatQueryForContentSearch(QString keyword, Xapian::Database &db);
int getResult(size_t uniqueSymbol, Xapian::MSet &result, QString value);
int getContentResult(size_t uniqueSymbol, Xapian::MSet &result, std::string &keyWord);
bool isBlocked(QString &path);
QQueue<QString> *m_search_result_file = nullptr;
QQueue<QString> *m_search_result_dir = nullptr;
QQueue<QPair<QString, QStringList>> *m_search_result_content = nullptr;
bool m_searching = false;
};
#endif // FILESEARCHER_H

View File

@ -0,0 +1,94 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "file-watcher.h"
using namespace UkuiSearch;
FileWatcher::FileWatcher(QObject *parent) : QObject(parent), m_config(FileIndexerConfig::getInstance())
{
qRegisterMetaType<QVector<PendingFile>>("QVector<PendingFile>");
m_watcher = new FileSystemWatcher();
m_pendingFileQUeue = PendingFileQueue::getInstance();
connect(m_watcher, &FileSystemWatcher::created, this, &FileWatcher::onFileCreated);
connect(m_watcher, &FileSystemWatcher::modified, this, &FileWatcher::onFileModefied);
connect(m_watcher, &FileSystemWatcher::deleted, this, &FileWatcher::onFileDeletedOrMoved);
connect(m_watcher, &FileSystemWatcher::moved, this, &FileWatcher::onFileDeletedOrMoved);
connect(m_pendingFileQUeue, &PendingFileQueue::filesUpdate, this, &FileWatcher::filesUpdate);
}
FileWatcher::~FileWatcher()
{
if(m_watcher) {
delete m_watcher;
m_watcher = nullptr;
}
}
void FileWatcher::addWatch(const QString &path, const QStringList &blackList)
{
m_watcher->addWatchWithBlackList(QStringList(path), blackList);
}
void FileWatcher::removeWatch(const QString &path, bool updateIndex)
{
QStringList paths = m_watcher->removeWatch(path);
if(updateIndex) {
for(QString &pathToDelete : paths) {
PendingFile file(pathToDelete);
file.setIsDir();
file.setDeleted();
m_pendingFileQUeue->enqueue(file);
}
}
}
void FileWatcher::installWatches()
{
m_watcher->addWatchWithBlackList(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex());
qDebug() << "Add watch ->" << m_config->currentIndexableDir() << "black list" << m_config->currentBlackListOfIndex();
}
void FileWatcher::removeWatch()
{
m_watcher->clearAll();
}
void FileWatcher::onFileDeletedOrMoved(const QString &path, bool isDir)
{
PendingFile file(path);
file.setIsDir(isDir);
file.setDeleted();
m_pendingFileQUeue->enqueue(file);
}
void FileWatcher::onFileCreated(const QString &path, bool isDir)
{
PendingFile file(path);
file.setIsDir(isDir);
file.setCreated();
m_pendingFileQUeue->enqueue(file);
}
void FileWatcher::onFileModefied(const QString &path)
{
PendingFile file(path);
file.setModified();
m_pendingFileQUeue->enqueue(file);
}

View File

@ -0,0 +1,73 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#ifndef FILEWATCHER_H
#define FILEWATCHER_H
#include <QObject>
#include "file-system-watcher.h"
#include "file-indexer-config.h"
#include "pending-file-queue.h"
namespace UkuiSearch {
/**
* @brief The FileWatcher class
*
*/
class FileWatcher : public QObject
{
Q_OBJECT
public:
explicit FileWatcher(QObject *parent = nullptr);
~FileWatcher();
public Q_SLOTS:
/**
* @brief addWatch
*
* @param
*/
void addWatch(const QString& path, const QStringList& blackList);
void removeWatch(const QString& path, bool updateIndex = true);
/**
* @brief installWatches
*
*/
void installWatches();
/**
* @brief removeWatch
*
*/
void removeWatch();
Q_SIGNALS:
void filesUpdate(const QVector<PendingFile>&);
void installedWatches();
private:
void onFileCreated(const QString& path, bool isDir);
void onFileModefied(const QString& path);
void onFileDeletedOrMoved(const QString& path, bool isDir);
FileSystemWatcher *m_watcher = nullptr;
FileIndexerConfig *m_config = nullptr;
PendingFileQueue *m_pendingFileQUeue = nullptr;
};
}
#endif // FILEWATCHER_H

View File

@ -1,374 +0,0 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangzihao <zhangzihao@kylinos.cn>
* Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
//#include <QtConcurrent>
#include "first-index.h"
#include "dir-watcher.h"
#include <QDebug>
/*需要重构:
*
*
*
*/
using namespace UkuiSearch;
FirstIndex *FirstIndex::m_instance = nullptr;
std::once_flag g_firstIndexInstanceFlag;
FirstIndex::FirstIndex() : m_semaphore(INDEX_SEM, 1, QSystemSemaphore::AccessMode::Open)
{
m_pool.setMaxThreadCount(2);
m_pool.setExpiryTimeout(100);
connect(this, &FirstIndex::needRebuild, this, &FirstIndex::rebuildDatebase, Qt::QueuedConnection);
}
FirstIndex *FirstIndex::getInstance()
{
std::call_once(g_firstIndexInstanceFlag, [] () {
m_instance = new FirstIndex;
});
return m_instance;
}
FirstIndex::~FirstIndex() {
qDebug() << "~FirstIndex";
if(this->m_indexData)
delete this->m_indexData;
this->m_indexData = nullptr;
if(this->m_contentIndexData)
delete this->m_contentIndexData;
this->m_contentIndexData = nullptr;
if(this->m_ocrIndexData)
delete this->m_ocrIndexData;
this->m_ocrIndexData = nullptr;
qDebug() << "~FirstIndex end";
}
void FirstIndex::work(const QFileInfo& fileInfo) {
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
this->m_indexData->enqueue(QVector<QString>() << fileInfo.fileName()
<< fileInfo.absoluteFilePath()
<< QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0")
<< fileInfo.lastModified().toString("yyyyMMddHHmmss"));
if (fileInfo.fileName().split(".", QString::SkipEmptyParts).length() < 2)
return;
if (true == targetFileTypeMap[fileInfo.fileName().split(".").last()]
and false == FileUtils::isEncrypedOrUnreadable(fileInfo.absoluteFilePath())) {
if (fileInfo.fileName().split(".").last() == "docx") {
QuaZip file(fileInfo.absoluteFilePath());
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive))
return;
QuaZipFile fileR(&file);
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//docx解压缩后的xml文件为实际需要解析文件大小
file.close();
} else if (fileInfo.fileName().split(".").last() == "pptx") {
QuaZip file(fileInfo.absoluteFilePath());
if(!file.open(QuaZip::mdUnzip))
return;
QString prefix("ppt/slides/slide");
qint64 fileSize(0);
qint64 fileIndex(0);
for(QString i : file.getFileNameList()) {
if(i.startsWith(prefix)){
QString name = prefix + QString::number(fileIndex + 1) + ".xml";
fileIndex++;
if(!file.setCurrentFile(name)) {
continue;
}
QuaZipFile fileR(&file);
fileSize += fileR.usize();
}
}
file.close();
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileSize));//pptx解压缩后的xml文件为实际需要解析文件大小
} else if (fileInfo.fileName().split(".").last() == "xlsx") {
QuaZip file(fileInfo.absoluteFilePath());
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive))
return;
QuaZipFile fileR(&file);
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//xlsx解压缩后的xml文件为实际解析文件大小
file.close();
} else {
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
}
} else if (true == targetPhotographTypeMap[fileInfo.fileName().split(".").last()]) {
if (FileUtils::isOcrSupportSize(fileInfo.absoluteFilePath())) {
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
//this->m_ocrIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
}
}
}
void FirstIndex::rebuildDatebase()
{
m_semaphore.acquire();
m_isRebuildProcess = true;
this->wait();
this->start();
}
void FirstIndex::addIndexPath(const QString path, const QStringList blockList)
{
m_semaphore.acquire();
m_isRebuildProcess = false;
setPath(QStringList() << path);
setBlockPath(blockList);
this->wait();
this->start();
}
void FirstIndex::run() {
QTime t1 = QTime::currentTime();
QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString();
QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString();
// QString ocrIndexDatabaseStatus = IndexStatusRecorder::getInstance()->getStatus(OCR_DATABASE_STATE).toString();
QString inotifyIndexStatus = IndexStatusRecorder::getInstance()->getStatus(INOTIFY_NORMAL_EXIT).toString();
qInfo() << "indexDataBaseStatus: " << indexDataBaseStatus;
qInfo() << "contentIndexDataBaseStatus: " << contentIndexDataBaseStatus;
// qInfo() << "ocrIndexDatabaseStatus: " << ocrIndexDatabaseStatus;
qInfo() << "inotifyIndexStatus: " << inotifyIndexStatus;
m_inotifyIndexStatus = inotifyIndexStatus == "2" ? true : false;
m_indexDatabaseStatus = indexDataBaseStatus == "2" ? true : false;
m_contentIndexDatabaseStatus = contentIndexDataBaseStatus == "2" ? true : false;
// m_ocrIndexDatabaseStatus = ocrIndexDatabaseStatus == "2" ? true : false;
if(m_inotifyIndexStatus && m_indexDatabaseStatus && m_contentIndexDatabaseStatus /*&& m_ocrIndexDatabaseStatus*/) {
m_needRebuild = false;
if(m_isRebuildProcess) {
m_isRebuildProcess = false;
m_semaphore.release(1);
return;
}
} else {
if(m_isRebuildProcess) {
setPath(DirWatcher::getDirWatcher()->currentIndexableDir());
setBlockPath(DirWatcher::getDirWatcher()->currentBlackListOfIndex());
} else {
if(m_inotifyIndexStatus && (!m_indexDatabaseStatus || !m_contentIndexDatabaseStatus)) {
m_needRebuild = true;
}
if(!m_inotifyIndexStatus || (!m_indexDatabaseStatus && !m_contentIndexDatabaseStatus)) {
m_needRebuild = false;
qInfo() << "Entering rebuild procedure";
Q_EMIT needRebuild();
m_semaphore.release(1);
return;
}
}
}
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "0");
this->m_indexData = new QQueue<QVector<QString>>();
this->m_contentIndexData = new QQueue<QPair<QString,qint64>>();
// this->m_ocrIndexData = new QQueue<QPair<QString,qint64>>();
++FileUtils::indexStatus;
pid_t pid;
pid = fork();
if(pid == 0) {
prctl(PR_SET_PDEATHSIG, SIGTERM);
prctl(PR_SET_NAME, "first-index");
QSemaphore sem(5);
QMutex mutex1, mutex2, mutex3;
mutex1.lock();
mutex2.lock();
// mutex3.lock();
//FIXME:在子进程里使用和父进程同样的dbus接口会出问题。
// qInfo() << "index dir" << DirWatcher::getDirWatcher()->currentIndexableDir();
// qInfo() << "index block dir" << DirWatcher::getDirWatcher()->currentBlackListOfIndex();
qInfo() << "index dir" << m_pathList;
qInfo() << "index block dir" << m_blockList;
this->Traverse();
FileUtils::maxIndexCount = this->m_indexData->length();
qDebug() << "max_index_count:" << FileUtils::maxIndexCount;
QtConcurrent::run(&m_pool, [&]() {
sem.acquire(2);
mutex1.unlock();
if(m_isRebuildProcess && m_inotifyIndexStatus && m_indexDatabaseStatus) { //重建索引且无异常
sem.release(2);
return;
} else if(m_isRebuildProcess) { //重建索引且有异常
IndexGenerator::getInstance()->rebuildIndexDatabase();
} else if(!m_inotifyIndexStatus || !m_indexDatabaseStatus) { //添加目录且有异常
qWarning() << "Index database need rebuild!";
sem.release(2);
return;
}
qDebug() << "index start;" << m_indexData->size();
QQueue<QVector<QString>>* tmp1 = new QQueue<QVector<QString>>();
bool sucess = true;
while(!this->m_indexData->empty()) {
for(size_t i = 0; (i < 8192) && (!this->m_indexData->empty()); ++i) {
tmp1->enqueue(this->m_indexData->dequeue());
}
if(!IndexGenerator::getInstance()->creatAllIndex(tmp1)) {
sucess = false;
break;
}
tmp1->clear();
}
delete tmp1;
qDebug() << "index end;";
if(sucess) {
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "2");
}
sem.release(2);
});
QtConcurrent::run(&m_pool,[&]() {
sem.acquire(2);
mutex2.unlock();
if(m_isRebuildProcess && m_inotifyIndexStatus && m_contentIndexDatabaseStatus) {
sem.release(2);
return;
} else if(m_isRebuildProcess) { //重建索引且有异常
IndexGenerator::getInstance()->rebuildContentIndexDatabase();
} else if(!m_inotifyIndexStatus || !m_contentIndexDatabaseStatus) { //添加目录且有异常
qWarning() << "Content index database need rebuild!";
sem.release(2);
return;
}
qDebug() << "content index start:" << m_contentIndexData->size();
QQueue<QString>* tmp2 = new QQueue<QString>();
bool sucess = true;
while(!this->m_contentIndexData->empty()) {
qint64 fileSize = 0;
//修改一次处理的数据量从30个文件改为文件总大小为50M以下50M为暂定值--jxx20210519
for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->m_contentIndexData->empty()); ++i) {
QPair<QString,qint64> tempPair = this->m_contentIndexData->dequeue();
fileSize += tempPair.second;
if (fileSize > 52428800 ) {
if (tmp2->size() == 0) {
tmp2->enqueue(tempPair.first);
break;
}
this->m_contentIndexData->enqueue(tempPair);
break;
}
tmp2->enqueue(tempPair.first);
}
if(!IndexGenerator::getInstance()->creatAllIndex(tmp2)) {
sucess = false;
break;
}
tmp2->clear();
}
delete tmp2;
qDebug() << "content index end;";
if(sucess) {
IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, "2");
}
sem.release(2);
});
// OCR功能目前合到内容搜索分类中
// QtConcurrent::run(&m_pool,[&]() {
// sem.acquire(5);
// mutex3.unlock();
// QQueue<QString>* tmpOcr = new QQueue<QString>();
// qDebug() << "m_ocr_index:" << m_ocr_index->size();
// if(m_isFirstIndex && m_allDatadaseStatus && m_contentIndexDatabaseStatus) {
// sem.release(2);
// return;
// }
// IndexGenerator::getInstance()->rebuildOcrIndexDatabase();
// bool sucess = true;
// while(!this->m_ocr_index->empty()) {
// qint64 fileSize = 0;
// //一次处理的数据量文件总大小为50M以下50M为暂定值
// for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->m_ocr_index->empty()); ++i) {
// QPair<QString,qint64> tempPair = this->m_ocr_index->dequeue();
// fileSize += tempPair.second;
// if (fileSize > 52428800) {
// if (tmpOcr->size() == 0) {
// tmpOcr->enqueue(tempPair.first);
// break;
// }
// this->m_ocr_index->enqueue(tempPair);
// break;
// }
// tmpOcr->enqueue(tempPair.first);
// }
// if(!IndexGenerator::getInstance()->creatAllIndex(tmpOcr)) {
// sucess = false;
// break;
// }
// tmpOcr->clear();
// }
// delete tmpOcr;
// qDebug() << "OCR index end;";
// if(sucess) {
// IndexStatusRecorder::getInstance()->setStatus(OCR_DATABASE_STATE, "2");
// }
// sem.release(5);
// });
mutex1.lock();
mutex2.lock();
// mutex3.lock();
sem.acquire(5);
mutex1.unlock();
mutex2.unlock();
// mutex3.unlock();
if(this->m_indexData)
delete this->m_indexData;
this->m_indexData = nullptr;
if(this->m_contentIndexData)
delete this->m_contentIndexData;
this->m_contentIndexData = nullptr;
if(this->m_ocrIndexData)
delete this->m_ocrIndexData;
this->m_ocrIndexData = nullptr;
::_exit(0);
} else if(pid < 0) {
qWarning() << "First Index fork error!!";
} else {
waitpid(pid, NULL, 0);
--FileUtils::indexStatus;
}
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2");
if(m_needRebuild) {
m_needRebuild = false;
qInfo() << "Entering rebuild procedure";
Q_EMIT needRebuild();
}
m_semaphore.release(1);
// int retval1 = write(fifo_fd, buffer, strlen(buffer));
// if(retval1 == -1) {
// qWarning("write error\n");
// }
// qDebug("write data ok!\n");
QTime t2 = QTime::currentTime();
qWarning() << t1;
qWarning() << t2;
return;
}

View File

@ -1,86 +0,0 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangzihao <zhangzihao@kylinos.cn>
* Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#ifndef FIRSTINDEX_H
#define FIRSTINDEX_H
#include <QThread>
#include <QtConcurrent/QtConcurrent>
#include <signal.h>
#include <QSemaphore>
#include <QSystemSemaphore>
#include<sys/types.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/wait.h>
#include <sys/prctl.h>
#include <syslog.h>
#include "traverse-bfs.h"
#include "index-status-recorder.h"
#include "index-generator.h"
#include "file-utils.h"
#include "common.h"
namespace UkuiSearch {
class FirstIndex : public QThread, public TraverseBFS
{
Q_OBJECT
public:
static FirstIndex* getInstance();
~FirstIndex();
virtual void work(const QFileInfo &) final;
void rebuildDatebase();
void addIndexPath(const QString path, const QStringList blockList);
Q_SIGNALS:
void needRebuild();
protected:
void run() override;
private:
FirstIndex();
FirstIndex(const FirstIndex&) = delete;
void operator=(const FirstIndex&) = delete;
static FirstIndex *m_instance;
bool m_indexDatabaseStatus = false;
bool m_contentIndexDatabaseStatus = false;
bool m_ocrIndexDatabaseStatus = false;
bool m_inotifyIndexStatus = false;
bool m_isRebuildProcess = true;
bool m_needRebuild = false;
QThreadPool m_pool;
QQueue<QVector<QString>>* m_indexData = nullptr;
// QQueue<QString>* q_content_index;
//修改QQueue存储数据为QPair<QString,qint64>,增加存储文件大小数据便于处理时统计--jxx20210519
QQueue<QPair<QString,qint64>>* m_contentIndexData = nullptr;
//新增ocr队列存储ocr可识别处理的图片信息及大小
QQueue<QPair<QString,qint64>>* m_ocrIndexData = nullptr;
//xapian will auto commit per 10,000 changes, donnot change it!!!
const size_t u_send_length = 8192;
QSystemSemaphore m_semaphore;
};
}
#endif // FIRSTINDEX_H

View File

@ -0,0 +1,310 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "first-run-indexer.h"
#include <QFileInfo>
#include <QTime>
#include <malloc.h>
#include "file-utils.h"
#include "basic-indexer.h"
#include "file-indexer-config.h"
#include "file-content-indexer.h"
#include "writable-database.h"
using namespace UkuiSearch;
FirstRunIndexer::FirstRunIndexer(const QStringList &folders, const QStringList &blackList, QAtomicInt& stop, WorkModes mode, Targets target)
: m_folders(folders),
m_blackList(blackList),
m_stop(&stop),
m_mode(mode),
m_target(target)
{
}
FirstRunIndexer::~FirstRunIndexer()
{
}
void FirstRunIndexer::run()
{
QTime t = QTime::currentTime();
if(m_target == Target::None) {
return;
}
fetch();
if(m_target & Target::Basic) {
basicIndex();
}
if(m_target & Target::Content) {
contentIndex();
}
m_cache.clear();
malloc_trim(0);
qDebug() << "FirstRunIndexer: time :" << t.elapsed();
Q_EMIT done();
}
void FirstRunIndexer::fetch()
{
qDebug() << "Now begin fetching files to be indexed...";
qDebug() << "Index folders:" << m_folders << "blacklist :" << m_blackList;
QQueue<QString> bfs;
for(QString blockPath : m_blackList) {
for(QString path : m_folders) {
if(FileUtils::isOrUnder(path, blockPath)) {
m_folders.removeOne(path);
}
}
}
m_cache.append(m_folders);
for(QString path : m_folders) {
bfs.enqueue(path);
}
QFileInfoList list;
QDir dir;
QStringList tmpList = m_blackList;
// QDir::Hidden
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
while(!bfs.empty()) {
dir.setPath(bfs.dequeue());
list = dir.entryInfoList();
for(auto i : list) {
bool isBlocked = false;
for(QString path : tmpList) {
if(i.absoluteFilePath() == path) {
isBlocked = true;
tmpList.removeOne(path);
break;
}
}
if(isBlocked)
continue;
if(i.isDir() && (!(i.isSymLink()))) {
bfs.enqueue(i.absoluteFilePath());
}
m_cache.append(i.absoluteFilePath());
}
}
qDebug() << m_cache.size() << "files founded, start index...";
}
void FirstRunIndexer::basicIndex()
{
qDebug() << "Begin basic index";
WritableDatabase basicDb(DataBaseType::Basic);
if(!basicDb.open()) {
qWarning() << "Basic db open failed, fail to run basic index!";
return;
}
QStringList filesNeedIndex;
if(m_mode == WorkMode::Rebuild) {
basicDb.rebuild();
if(!basicDb.open()) {
qWarning() << "basicDb db open failed, fail to run basic index!";
return;
}
filesNeedIndex = m_cache;
qDebug() <<filesNeedIndex.size() << "files need index.";
} else if(m_mode == WorkMode::Add) {
filesNeedIndex = m_cache;
qDebug() <<filesNeedIndex.size() << "files need index.";
} else if (m_mode == WorkMode::Update) {
QFileInfo info;
QMap<std::string, std::string> indexTimes = basicDb.getIndexTimes();
qDebug() << indexTimes.size() << "documents recorded";
for(const QString& path : m_cache) {
info.setFile(path);
if(indexTimes.take(FileUtils::makeDocUterm(path)) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) {
filesNeedIndex.append(path);
}
}
if(!indexTimes.isEmpty()) {
qDebug() << indexTimes.size() << "documents need remove.";
for(std::string uniqueTerm : indexTimes.keys()) {
basicDb.removeDocument(uniqueTerm);
basicDb.setMetaData(uniqueTerm, "");
}
basicDb.commit();
}
qDebug() << filesNeedIndex.size() << "files need update.";
}
uint allSize = filesNeedIndex.size();
Q_EMIT progress(IndexType::Basic, allSize, 0);
uint batchSize = 0;
uint finishNum = 0;
for (const QString& path: filesNeedIndex) {
BasicIndexer indexer(path);
if(indexer.index()) {
basicDb.addDocument(indexer.document());
++batchSize;
++finishNum;
}
if(batchSize >= 8192) {
qDebug() << "8192 finished.";
basicDb.commit();
Q_EMIT progress(IndexType::Basic, allSize, finishNum);
batchSize = 0;
}
}
//TODO:xapian默认10000条自动commit一次需要根据内存占用情况调整。
basicDb.commit();
Q_EMIT progress(IndexType::Basic, allSize, finishNum);
Q_EMIT basicIndexDone(finishNum);
filesNeedIndex.clear();
qDebug() << "Finish basic index";
}
void FirstRunIndexer::contentIndex()
{
qDebug() << "Begin content index";
if(m_stop->load()) {
qDebug() << "Index stopped, abort content index.";
return;
}
WritableDatabase contentDb(DataBaseType::Content);
if(!contentDb.open()) {
qWarning() << "Content db open failed, fail to run content index!";
return;
}
QStringList filesNeedIndex;
QStringList filesNeedOCRIndex;
QMap<QString, bool> suffixMap = targetFileTypeMap;
QFileInfo info;
// ocr
// bool ocrEnable = FileIndexerConfig::getInstance()->isOCREnable();
if(FileIndexerConfig::getInstance()->isOCREnable()) {
qDebug() << "OCR enabled.";
suffixMap.unite(targetPhotographTypeMap);
}
if(m_mode == WorkMode::Rebuild) {
contentDb.rebuild();
if(!contentDb.open()) {
return;
}
}
if(m_mode == WorkMode::Rebuild || m_mode == WorkMode::Add) {
for(QString path : m_cache) {
info.setFile(path);
if(true == suffixMap[info.suffix()] && info.isFile()) {
if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) {
filesNeedIndex.append(path);
}
}
}
} else if(m_mode == WorkMode::Update) {
QMap<std::string, std::string> indexTimes = contentDb.getIndexTimes();
qDebug() << indexTimes.size() << "documents recorded";
for(QString path : m_cache) {
info.setFile(path);
if(true == suffixMap[info.suffix()] && info.isFile()) {
std::string uterm = FileUtils::makeDocUterm(path);
if(indexTimes.value(uterm) != info.lastModified().toString("yyyyMMddHHmmsszzz").toStdString()) {
if(!FileUtils::isEncrypedOrUnsupport(path, info.suffix())) {
filesNeedIndex.append(path);
indexTimes.remove(uterm);
}
} else {
indexTimes.remove(uterm);
}
}
}
if(!indexTimes.isEmpty()) {
qDebug() << indexTimes.size() << "documents need remove";
for(std::string uniqueTerm : indexTimes.keys()) {
contentDb.removeDocument(uniqueTerm);
contentDb.setMetaData(uniqueTerm, "");
}
contentDb.commit();
}
}
uint allSize = filesNeedIndex.size();
qDebug() << allSize << "files need content index.";
Q_EMIT progress(IndexType::Contents, allSize, 0);
uint batchSize = 0;
uint finishNum = 0;
for (QString path : filesNeedIndex) {
info.setFile(path);
if(true == targetPhotographTypeMap[info.suffix()]) {
filesNeedOCRIndex.append(path);
filesNeedIndex.removeOne(path);
continue;
}
fileContentIndexer indexer(path);
if(indexer.index()) {
contentDb.addDocument(indexer.document());
++batchSize;
++finishNum;
} else {
qDebug() << "Extract fail===" << path;
}
if(batchSize >= 30) {
contentDb.commit();
qDebug() << "30 finished.";
Q_EMIT progress(IndexType::Contents, allSize, finishNum);
batchSize = 0;
}
if(m_stop->load()) {
qDebug() << "Index stopped, interrupt content index.";
break;
}
}
contentDb.commit();
Q_EMIT progress(IndexType::Contents, allSize, finishNum);
filesNeedIndex.clear();
qDebug() << "Content index for normal files finished, now begin OCR index";
int ocrSize = filesNeedOCRIndex.size();
qDebug() << ocrSize << "pictures need OCR index.";
batchSize = 0;
int ocrFinishNum = 0;
for(QString path : filesNeedOCRIndex) {
fileContentIndexer indexer(path);
if(indexer.index()) {
contentDb.addDocument(indexer.document());
++batchSize;
++ocrFinishNum;
} else {
qDebug() << "Extract fail===" << path;
}
if(batchSize >= 30) {
contentDb.commit();
qDebug() << "30 finished.";
Q_EMIT progress(IndexType::Contents, allSize, finishNum + ocrFinishNum);
Q_EMIT progress(IndexType::OCR, ocrSize, ocrFinishNum);
batchSize = 0;
}
if(m_stop->load()) {
qDebug() << "Index stopped, interrupt content index.";
break;
}
}
contentDb.commit();
Q_EMIT progress(IndexType::OCR, ocrSize, ocrFinishNum);
Q_EMIT progress(IndexType::Contents, allSize, finishNum + ocrFinishNum);
filesNeedOCRIndex.clear();
qDebug() << "Finish OCR index.";
Q_EMIT contentIndexDone(finishNum + ocrFinishNum);
qDebug() << "Finish content index";
}

View File

@ -0,0 +1,84 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#ifndef FIRSTRUNINDEXER_H
#define FIRSTRUNINDEXER_H
#include <QRunnable>
#include <QObject>
#include <QAtomicInt>
#include "common.h"
namespace UkuiSearch {
class FirstRunIndexer : public QObject, public QRunnable
{
Q_OBJECT
public:
/**
* @brief The WorkMode enum
* Update
* Add
* Rebuild
*/
enum WorkMode{
Update = 0,
Add = 1,
Rebuild
};
Q_DECLARE_FLAGS(WorkModes, WorkMode)
/**
* @brief The Target enum
*
* All
* Basic
* Content
*/
enum Target{
None = 0,
Basic = 1u << 0,
Content = 1u << 1,
All = Basic | Content
};
Q_DECLARE_FLAGS(Targets, Target)
FirstRunIndexer(const QStringList& folders, const QStringList& blackList, QAtomicInt& stop, WorkModes mode = WorkMode::Update, Targets target = Target::All);
~FirstRunIndexer();
void run() override;
Q_SIGNALS:
void progress(IndexType type, uint all, uint finished);
void basicIndexDone(int size);
void contentIndexDone(int size);
void done();
private:
void fetch();
void basicIndex();
void contentIndex();
WorkModes m_mode;
Targets m_target;
QStringList m_folders;
QStringList m_blackList;
QStringList m_cache;
QAtomicInt *m_stop = nullptr;
};
Q_DECLARE_OPERATORS_FOR_FLAGS(FirstRunIndexer::Targets)
}
#endif // FIRSTRUNINDEXER_H

View File

@ -1,669 +0,0 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
* Modified by: zhangzihao <zhangzihao@kylinos.cn>
*
*/
#include <QFile>
#include <QFileInfo>
#include <QDebug>
#include <QtConcurrent>
#include <QFuture>
#include <QThreadPool>
#include <QFile>
#include <QStandardPaths>
#include <malloc.h>
#include "file-utils.h"
#include "index-generator.h"
#include "chinese-segmentation.h"
using namespace UkuiSearch;
static IndexGenerator *global_instance = nullptr;
QMutex IndexGenerator::m_mutex;
//QVector<Document> *UkuiSearch::g_docListForPath;
//QMutex UkuiSearch::g_mutexDocListForPath;
//QVector<Document> *UkuiSearch::g_docListForContent;
//QMutex UkuiSearch::g_mutexDocListForContent;
QMutex IndexGenerator::g_mutexDocListForPath;
QMutex IndexGenerator::g_mutexDocListForContent;
QMutex IndexGenerator::g_mutexDocListForOcr;
QVector<Document> IndexGenerator::g_docListForPath = QVector<Document>();
QVector<Document> IndexGenerator::g_docListForContent = QVector<Document>();
QVector<Document> IndexGenerator::g_docListForOcr = QVector<Document>();
IndexGenerator *IndexGenerator::getInstance() {
QMutexLocker locker(&m_mutex);
if(!global_instance) {
global_instance = new IndexGenerator();
}
return global_instance;
}
//文件名索引
bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList) {
HandlePathList(messageList);
// if(g_docListForPath == NULL) {
// return false;
// }
if(IndexGenerator::g_docListForPath.isEmpty()) {
return false;
}
qDebug() << "begin creatAllIndex";
try {
for(auto i : IndexGenerator::g_docListForPath) {
insertIntoDatabase(i);
}
m_database_path->commit();
} catch(const Xapian::Error &e) {
qWarning() << "creatAllIndex fail!" << QString::fromStdString(e.get_description());
//need a record
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "1");
return false;
}
qDebug() << "finish creatAllIndex";
IndexGenerator::g_docListForPath.clear();
IndexGenerator::g_docListForPath.squeeze();
QVector<Document>().swap(IndexGenerator::g_docListForPath);
// delete g_docListForPath;
// g_docListForPath = nullptr;
return true;
}
//文件内容索引
bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList) {
HandlePathList(messageList);
qDebug() << "begin creatAllIndex for content";
if(IndexGenerator::g_docListForContent.isEmpty()) {
return false;
}
int size = IndexGenerator::g_docListForContent.size();
qDebug() << "begin creatAllIndex for content" << size;
if(!size == 0) {
try {
int count = 0;
for(Document i : IndexGenerator::g_docListForContent) {
if(!i.isRequiredDeleted()) {
m_database_content->replace_document(i.getUniqueTerm(), i.getXapianDocument());
} else {
m_database_content->delete_document(i.getUniqueTerm());
}
if(++count > 999) {
count = 0;
m_database_content->commit();
}
}
m_database_content->commit();
} catch(const Xapian::Error &e) {
qWarning() << "creat content Index fail!" << QString::fromStdString(e.get_description());
IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, "1");
return false;
}
qDebug() << "finish creatAllIndex for content";
IndexGenerator::g_docListForContent.clear();
IndexGenerator::g_docListForContent.squeeze();
QVector<Document>().swap(IndexGenerator::g_docListForContent);
malloc_trim(0);
}
Q_EMIT this->transactionFinished();
return true;
}
bool IndexGenerator::creatOcrIndex(QQueue<QString> *messageList)
{
HandleOcrPathList(messageList);
if(IndexGenerator::g_docListForOcr.isEmpty()) {
return false;
}
int size = IndexGenerator::g_docListForOcr.size();
qDebug() << "begin creatAllIndex for ocr" << size;
if(!size == 0) {
try {
int count = 0;
for(Document i : IndexGenerator::g_docListForOcr) {
if(!i.isRequiredDeleted()) {
m_database_ocr->replace_document(i.getUniqueTerm(), i.getXapianDocument());
} else {
m_database_ocr->delete_document(i.getUniqueTerm());
}
if(++count > 999) {
count = 0;
m_database_ocr->commit();
}
}
m_database_ocr->commit();
} catch(const Xapian::Error &e) {
qWarning() << "creat ocr Index fail!" << QString::fromStdString(e.get_description());
IndexStatusRecorder::getInstance()->setStatus(OCR_DATABASE_STATE, "1");
return false;
}
qDebug() << "finish creatAllIndex for ocr";
IndexGenerator::g_docListForOcr.clear();
IndexGenerator::g_docListForOcr.squeeze();
QVector<Document>().swap(IndexGenerator::g_docListForOcr);
malloc_trim(0);
}
return true;
}
IndexGenerator::IndexGenerator(QObject *parent) : QObject(parent)
{
QDir database(INDEX_PATH);
if(!database.exists()) {
qDebug() << "create index path" << INDEX_PATH<< database.mkpath(INDEX_PATH);
}
database.setPath(CONTENT_INDEX_PATH);
if(!database.exists()) {
qDebug() << "create content index path" << CONTENT_INDEX_PATH << database.mkpath(CONTENT_INDEX_PATH);
}
// database.setPath(OCR_INDEX_PATH);
// if(!database.exists()) {
// qDebug() << "create ocr index path" << OCR_INDEX_PATH << database.mkpath(OCR_INDEX_PATH);
// }
try {
m_database_path = new Xapian::WritableDatabase(INDEX_PATH.toStdString(), Xapian::DB_CREATE_OR_OPEN);
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH.toStdString(), Xapian::DB_CREATE_OR_OPEN);
// m_database_ocr = new Xapian::WritableDatabase(OCR_INDEX_PATH.toStdString(), Xapian::DB_CREATE_OR_OPEN);
} catch(const Xapian::Error &e) {
qWarning() << "creat Index fail!" << QString::fromStdString(e.get_description());
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
assert(false);
}
}
IndexGenerator::~IndexGenerator() {
QMutexLocker locker(&m_mutex);
qDebug() << "~IndexGenerator";
if(m_database_path)
m_database_path->~WritableDatabase();
// delete m_database_path;
m_database_path = nullptr;
if(m_database_content)
m_database_content->~WritableDatabase();
// delete m_database_content;
if(m_database_ocr)
m_database_ocr->~WritableDatabase();
m_database_path = nullptr;
m_database_content = nullptr;
m_database_ocr = nullptr;
global_instance = nullptr;
// if(m_index_map)
// delete m_index_map;
// m_index_map = nullptr;
// if(mg_docListForPath)
// delete mg_docListForPath;
// mg_docListForPath = nullptr;
// if(mg_docListForContent)
// delete mg_docListForContent;
// mg_docListForContent = nullptr;
// if(m_index_data_path)
// delete m_index_data_path;
// m_index_data_path = nullptr;
// if(m_indexer)
// delete m_indexer;
// m_indexer = nullptr;
// GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2");
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
// GlobalSettings::getInstance()->setValue(INDEX_GENERATOR_NORMAL_EXIT, "2");
qDebug() << "QThread::currentThreadId()" << QThread::currentThreadId();
qDebug() << "~IndexGenerator end";
}
void IndexGenerator::rebuildIndexDatabase(const QString &path)
{
QDir database(path);
if(database.exists()) {
qDebug() << "remove" << path << database.removeRecursively();
} else {
qDebug() << "create index path" << path << database.mkpath(path);
}
if(m_database_path)
m_database_path->~WritableDatabase();
m_database_path = new Xapian::WritableDatabase(path.toStdString(), Xapian::DB_CREATE_OR_OPEN);
}
void IndexGenerator::rebuildContentIndexDatabase(const QString &path)
{
QDir database(path);
if(database.exists()) {
qDebug() << "remove" << path << database.removeRecursively();
} else {
qDebug() << "create content index path" << path << database.mkpath(path);
}
if(m_database_content)
m_database_content->~WritableDatabase();
m_database_content = new Xapian::WritableDatabase(path.toStdString(), Xapian::DB_CREATE_OR_OPEN);
}
void IndexGenerator::rebuildOcrIndexDatabase(const QString &path)
{
QDir database(path);
if(database.exists()) {
qDebug() << "remove" << path << database.removeRecursively();
} else {
qDebug() << "create ocr index path" << path << database.mkpath(path);
}
if(m_database_ocr)
m_database_ocr->~WritableDatabase();
m_database_ocr = new Xapian::WritableDatabase(path.toStdString(), Xapian::DB_CREATE_OR_OPEN);
}
void IndexGenerator::insertIntoDatabase(Document& doc) {
// qDebug()<< "--index start--";
Xapian::Document document = doc.getXapianDocument();
// m_indexer.set_document(document);
// qDebug()<<doc.getIndexText();
// for(auto i : doc.getIndexText()){
// m_indexer.index_text(i.toStdString());
// }
// qDebug()<<"insert m_database_path:"<<QString::fromStdString(document.get_data());
// qDebug()<<"document:"<<QString::fromStdString(document.get_description());
Xapian::docid innerId = m_database_path->replace_document(doc.getUniqueTerm(), document);
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
// qDebug()<< "--index finish--";
return;
}
//#define fun(a) a=new ;printf()
void IndexGenerator::insertIntoContentDatabase(Document& doc) {
Xapian::docid innerId = m_database_content->replace_document(doc.getUniqueTerm(), doc.getXapianDocument());
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
// qDebug()<< "--index finish--";
return;
}
void IndexGenerator::HandlePathList(QQueue<QVector<QString>> *messageList) {
qDebug() << "Begin HandlePathList!";
qDebug() << messageList->size();
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
// QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument);
// future.waitForFinished();
// QList<Document> docList = future.results();
// future.cancel();
// mg_docListForPath = new QList<Document>(docList);
QThreadPool pool;
pool.setMaxThreadCount(((QThread::idealThreadCount() - 1) / 2) + 1);
pool.setExpiryTimeout(100);
ConstructDocumentForPath *constructer;
while(!messageList->isEmpty()) {
constructer = new ConstructDocumentForPath(messageList->dequeue());
pool.start(constructer);
}
qDebug() << "pool finish" << pool.waitForDone(-1);
// if(constructer)
// delete constructer;
// constructer = nullptr;
// qDebug()<<g_docListForPath->size();
// qWarning() << g_docListForPath;
// QList<Document> docList = future.results();
// mg_docListForPath = new QList<Document>(docList);
// mg_docListForPath = std::move(future.results());
// qDebug()<<mg_docListForPath.size();
qDebug() << "Finish HandlePathList!";
return;
}
void IndexGenerator::HandlePathList(QQueue<QString> *messageList) {
qDebug() << "Begin HandlePathList for content index!";
qDebug() << messageList->size();
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
ChineseSegmentation::getInstance();
ConstructDocumentForContent *constructer;
QThreadPool pool;
// pool.setMaxThreadCount(((QThread::idealThreadCount() - 1) / 2) + 1);
pool.setMaxThreadCount(1);
pool.setExpiryTimeout(100);
while(!messageList->isEmpty()) {
constructer = new ConstructDocumentForContent(messageList->dequeue());
pool.start(constructer);
}
qDebug() << "pool finish" << pool.waitForDone(-1);
qDebug() << "Finish HandlePathList for content index!";
return;
}
void IndexGenerator::HandleOcrPathList(QQueue<QString> *messageList)
{
qDebug() << "Begin HandlePathList for ocr index!";
qDebug() << messageList->size();
ConstructDocumentForOcr *constructer;
QThreadPool pool;
pool.setMaxThreadCount(1);
pool.setExpiryTimeout(100);
while(!messageList->isEmpty()) {
constructer = new ConstructDocumentForOcr(messageList->dequeue());
pool.start(constructer);
}
qDebug() << "pool finish" << pool.waitForDone(-1);
qDebug() << "Finish HandlePathList for content index!";
return;
}
//deprecated
Document IndexGenerator::GenerateDocument(const QVector<QString> &list) {
Document doc;
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
//0-filename 1-filepathname 2-file or dir
QString index_text = list.at(0);
QString sourcePath = list.at(1);
index_text = index_text.replace("", " ");
index_text = index_text.simplified();
//不带多音字版
// QString pinyin_text = FileUtils::find(QString(list.at(0)).replace(".","")).replace("", " ").simplified();
//多音字版
//现加入首字母
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(list.at(0)).replace(".", ""));
for(QString& i : pinyin_text_list) {
i.replace("", " ");
i = i.simplified();
}
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
// QString uniqueterm1 = QString::fromStdString(QCryptographicHash::hash(sourcePath.toUtf8(),QCryptographicHash::Md5).toStdString());
/*--------------------------------------------------------------------*/
//QByteArray 和 QString 之间会进行隐式转换,造成字符串被截断等意想不到的后果!!!!!!! zpf
// if(uniqueterm1!=uniqueterm){
// qDebug()<<"-----------------------------------------start";
// qDebug()<<uniqueterm1;
// qDebug()<<uniqueterm;
// qDebug()<<"------------------------------------------finish";
// }
/*--------------------------------------------------------------------*/
doc.setData(sourcePath);
doc.setUniqueTerm(uniqueterm);
doc.addTerm(upTerm);
doc.addValue(1, list.at(2));
QStringList temp;
temp.append(index_text);
temp.append(pinyin_text_list);
doc.setIndexText(temp);
return doc;
}
//deprecated
Document IndexGenerator::GenerateContentDocument(const QString &path) {
// 构造文本索引的document
QString content;
QStringList tmp;
std::vector<KeyWord> term;
KeyWord skw;
Document doc;
QString uniqueterm;
QString upTerm;
QString suffix;
FileReader::getTextContent(path, content, suffix);
term = ChineseSegmentation::getInstance()->callSegment(content.toStdString());
// QStringList term = content.split("");
doc.setData(content);
doc.setUniqueTerm(uniqueterm);
doc.addTerm(upTerm);
doc.addValue(1, path);
doc.addValue(2, suffix);
for(int i = 0; i < term.size(); ++i) {
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
}
// Document doc;
// doc.setData(content);
// doc.setUniqueTerm(uniqueterm);
// doc.addTerm(upTerm);
// doc.addValue(path);
// int pos = 0;
// for(QString i : term)
// {
// doc.addPosting(i.toStdString(),QVector<size_t>() << ++pos,1);
// }
content.clear();
term.clear();
return doc;
}
//deprecated
QStringList IndexGenerator::IndexSearch(QString indexText) {
QStringList searchResult;
try {
qDebug() << "--search start--";
Xapian::Database db(INDEX_PATH.toStdString());
Xapian::Enquire enquire(db);
Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_PHRASE);
qp.set_database(db);
auto userInput = indexText;
std::string queryStr = indexText.replace("", " ").toStdString();
// std::string s =db.get_spelling_suggestion(queryStr,10);
// qDebug()<<"spelling_suggestion!"<<QString::fromStdString(s);
qDebug() << "queryStr!" << QString::fromStdString(queryStr);
//Creat a query
Xapian::Query queryPhrase = qp.parse_query(queryStr, Xapian::QueryParser::FLAG_PHRASE);
std::vector<Xapian::Query> v;
for(int i = 0; i < userInput.size(); i++) {
v.push_back(Xapian::Query(QString(userInput.at(i)).toStdString()));
qDebug() << userInput.at(i);
qDebug() << QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
}
Xapian::Query queryNear = Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND, queryNear, queryPhrase);
qDebug() << QString::fromStdString(query.get_description());
enquire.set_query(query);
Xapian::MSet result = enquire.get_mset(0, 9999);
qDebug() << "find results count=" << static_cast<int>(result.get_matches_estimated());
// QStringList *pathTobeDelete = new QStringList;
//get search result
for(auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
Xapian::weight docScoreWeight = it.get_weight();
Xapian::percent docScorePercent = it.get_percent();
// QFileInfo *info = new QFileInfo(QString::fromStdString(data));
QFileInfo info(QString::fromStdString(data));
if(!info.exists()) {
// pathTobeDelete->append(QString::fromStdString(data));
qDebug() << QString::fromStdString(data) << "is not exist!!";
} else {
searchResult.append(QString::fromStdString(data));
}
qDebug() << "doc=" << QString::fromStdString(data) << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
}
// //Delete those path doc which is not already exist.
// if(!pathTobeDelete->isEmpty())
// deleteAllIndex(pathTobeDelete);
qDebug() << "--search finish--";
} catch(const Xapian::Error &e) {
qDebug() << QString::fromStdString(e.get_description());
}
return searchResult;
}
bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
QStringList *list = pathlist;
if(list->isEmpty())
return true;
try {
qDebug() << "--delete start--";
for(int i = 0; i < list->size(); i++) {
QString doc = list->at(i);
std::string uniqueterm = FileUtils::makeDocUterm(doc);
std::string upterm = "ZEEKERUPTERM" + FileUtils::makeDocUterm(doc);
m_database_path->delete_document(uniqueterm);
m_database_content->delete_document(uniqueterm);
// m_database_ocr->delete_document(uniqueterm);
//delete all files under it if it's a dir.
m_database_path->delete_document(upterm);
m_database_content->delete_document(upterm);
// m_database_ocr->delete_document(upterm);
qDebug() << "delete path" << doc;
// qDebug() << "delete md5" << QString::fromStdString(uniqueterm);
// qDebug()<<"m_database_path->get_lastdocid()!!!"<<m_database_path->get_lastdocid();
// qDebug()<<"m_database_path->get_doccount()!!!"<<m_database_path->get_doccount();
}
m_database_path->commit();
m_database_content->commit();
// m_database_ocr->commit();
qDebug() << "--delete finish--";
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return false;
}
Q_EMIT this->transactionFinished();
return true;
}
bool IndexGenerator::deleteContentIndex(QStringList *pathlist)
{
if(pathlist->isEmpty())
return true;
try {
qDebug() << "--delete start--";
for(int i = 0; i < pathlist->size(); i++) {
QString doc = pathlist->at(i);
std::string uniqueterm = FileUtils::makeDocUterm(doc);
m_database_content->delete_document(uniqueterm);
qDebug() << "delete path" << doc;
}
m_database_content->commit();
qDebug() << "--delete finish--";
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return false;
}
return true;
}
bool IndexGenerator::deleteOcrIndex(QStringList *pathlist)
{
if(pathlist->isEmpty())
return true;
try {
qDebug() << "--delete start--";
for(int i = 0; i < pathlist->size(); i++) {
QString doc = pathlist->at(i);
std::string uniqueterm = FileUtils::makeDocUterm(doc);
m_database_ocr->delete_document(uniqueterm);
qDebug() << "delete path" << doc;
}
m_database_ocr->commit();
qDebug() << "--delete finish--";
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return false;
}
return true;
}
bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
{
QQueue<QVector<QString>> *fileIndexInfo = new QQueue<QVector<QString>>;
QQueue<QString> *fileContentIndexInfo = new QQueue<QString>;
//QQueue<QString> *fileOcrIndexInfo = new QQueue<QString>;
QStringList *deleteList = new QStringList;
QStringList *contentDeleteList = new QStringList;
for (PendingFile file : *pendingFiles) {
if (file.shouldRemoveIndex()) {
deleteList->append(file.path());
continue;
}
fileIndexInfo->append(QVector<QString>() << file.path().section("/" , -1)
<< file.path() << QString(file.isDir() ? "1" : "0")
<< QFileInfo(file.path()).lastModified().toString("yyyyMMddHHmmss"));
if (file.path().split(".").isEmpty()){
continue;
}
if (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()]) {
if (!FileUtils::isEncrypedOrUnreadable(file.path())) {
fileContentIndexInfo->append(file.path());
} else {
contentDeleteList->append(file.path());
}
} else if (true == targetPhotographTypeMap[file.path().section("/" , -1) .split(".").last()]) {
if (FileUtils::isOcrSupportSize(file.path())) {
fileContentIndexInfo->append(file.path());
}
}
}
if (!deleteList->isEmpty()) {
deleteAllIndex(deleteList);
}
if (!contentDeleteList->isEmpty()) {
deleteContentIndex(contentDeleteList);
}
if (!fileIndexInfo->isEmpty()) {
creatAllIndex(fileIndexInfo);
}
if (!fileContentIndexInfo->isEmpty()) {
creatAllIndex(fileContentIndexInfo);
}
//if (!fileOcrIndexInfo->isEmpty()) {
// creatOcrIndex(fileOcrIndexInfo);
//}
if (fileIndexInfo) {
delete fileIndexInfo;
fileIndexInfo = nullptr;
}
if (fileContentIndexInfo) {
delete fileContentIndexInfo;
fileContentIndexInfo = nullptr;
}
//if (fileOcrIndexInfo) {
// delete fileOcrIndexInfo;
// fileOcrIndexInfo = nullptr;
//}
if (deleteList) {
delete deleteList;
deleteList = nullptr;
}
if (contentDeleteList) {
delete contentDeleteList;
contentDeleteList = nullptr;
}
return true;
}

View File

@ -1,105 +0,0 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#ifndef INDEXGENERATOR_H
#define INDEXGENERATOR_H
#include <xapian.h>
#include <QObject>
//#include <QtConcurrent/QtConcurrent>
#include <QStringList>
#include <QMap>
#include <QCryptographicHash>
#include <QMutex>
#include <QQueue>
//#include <QMetaObject>
#include "construct-document.h"
#include "index-status-recorder.h"
#include "document.h"
#include "file-reader.h"
#include "common.h"
#include "pending-file.h"
namespace UkuiSearch {
//extern QVector<Document> *_doc_list_path;
//extern QMutex _mutex_doc_list_path;
//extern QVector<Document> *_doc_list_content;
//extern QMutex _mutex_doc_list_content;
class IndexGenerator : public QObject {
friend class ConstructDocumentForPath;
friend class ConstructDocumentForContent;
friend class ConstructDocumentForOcr;
Q_OBJECT
public:
static IndexGenerator *getInstance();
~IndexGenerator();
void rebuildIndexDatabase(const QString &path = INDEX_PATH);
void rebuildContentIndexDatabase(const QString &path = CONTENT_INDEX_PATH);
void rebuildOcrIndexDatabase(const QString &path = OCR_INDEX_PATH);
// Q_INVOKABLE void appendDocListPath(Document doc);
//for search test
static QStringList IndexSearch(QString indexText);
void setSynonym();
Q_SIGNALS:
void transactionFinished();
void searchFinish();
public Q_SLOTS:
bool creatAllIndex(QQueue<QVector<QString>> *messageList);
bool creatAllIndex(QQueue<QString> *messageList);
bool creatOcrIndex(QQueue<QString> *messageList);
bool deleteAllIndex(QStringList *pathlist);
bool deleteContentIndex(QStringList *pathlist);
bool deleteOcrIndex(QStringList *pathlist);
bool updateIndex(QVector<PendingFile> *pendingFiles);
private:
explicit IndexGenerator(QObject *parent = nullptr);
static QMutex m_mutex;
//For file name index
void HandlePathList(QQueue<QVector<QString> > *messageList);
//For file content index
void HandlePathList(QQueue<QString> *messageList);
//For ocr index
void HandleOcrPathList(QQueue<QString> *messageList);
static Document GenerateDocument(const QVector<QString> &list);
static Document GenerateContentDocument(const QString &list);
//add one data in database
void insertIntoDatabase(Document& doc);
void insertIntoContentDatabase(Document& doc);
static QVector<Document> g_docListForPath;
static QMutex g_mutexDocListForPath;
static QVector<Document> g_docListForContent;
static QMutex g_mutexDocListForContent;
static QVector<Document> g_docListForOcr;
static QMutex g_mutexDocListForOcr;
QMap<QString, QStringList> m_index_map;
QString m_index_data_path;
Xapian::WritableDatabase* m_database_path = nullptr;
Xapian::WritableDatabase* m_database_content = nullptr;
Xapian::WritableDatabase* m_database_ocr = nullptr;
std::string m_docstr;
std::string m_index_text_str;
Xapian::TermGenerator m_indexer;
};
}
#endif // INDEXGENERATOR_H

View File

@ -0,0 +1,217 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "index-scheduler.h"
#include "index-updater.h"
#include "first-run-indexer.h"
using namespace UkuiSearch;
IndexScheduler::IndexScheduler(QObject *parent) :
QObject(parent),
m_statusRecorder(IndexStatusRecorder::getInstance()),
m_config(FileIndexerConfig::getInstance()),
m_state(Startup),
m_stop(0)
{
qRegisterMetaType<IndexerState>("IndexerState");
m_threadPool.setMaxThreadCount(1);
connect(&m_fileWatcher, &FileWatcher::filesUpdate, this, &IndexScheduler::updateIndex);
connect(m_config, &FileIndexerConfig::fileIndexEnableStatusChanged, this, &IndexScheduler::fileIndexEnable);
connect(m_config, &FileIndexerConfig::appendIndexDir, this, &IndexScheduler::addNewPath);
connect(m_config, &FileIndexerConfig::removeIndexDir, this, &IndexScheduler::removeIndex);
m_state = Startup;
if(m_config->isFileIndexEnable()) {
scheduleIndexing();
} else {
m_stop.fetchAndStoreRelaxed(1);
}
}
void IndexScheduler::addNewPath(const QString &folders, const QStringList &blackList)
{
if(m_stop.load()) {
qDebug() << "Index Scheduler is being stopped, add operation will be executed when started up next time.";
return;
}
m_isAddNewPathFinished = false;
m_state = Running;
FirstRunIndexer::Targets target = FirstRunIndexer::Target::None;
if(m_config->isFileIndexEnable()) {
target |= FirstRunIndexer::Target::Basic;
}
if(m_config->isContentIndexEnable()) {
target |= FirstRunIndexer::Target::Content;
}
if(FirstRunIndexer::Target::None != target) {
FirstRunIndexer *indexer = new FirstRunIndexer(QStringList(folders), blackList, m_stop, FirstRunIndexer::WorkMode::Add, target);
connect(indexer, &FirstRunIndexer::done, this, &IndexScheduler::addNewPathFinished, Qt::QueuedConnection);
m_threadPool.start(indexer);
}
}
void IndexScheduler::removeIndex(const QString &folders)
{
if(m_stop.load()) {
qDebug() << "Index Scheduler is being stopped, remove operation will be executed when started up next time.";
return;
}
m_fileWatcher.removeWatch(folders, true);
}
void IndexScheduler::stop()
{
m_stop.fetchAndStoreRelaxed(1);
m_fileWatcher.removeWatch();
m_threadPool.clear();
m_state = Stop;
qDebug() << "Index scheduler has been stopped.";
Q_EMIT stateChange(m_state);
}
void IndexScheduler::scheduleIndexing()
{
if(!m_isFirstRunFinished) {
return;
}
m_isFirstRunFinished = false;
m_stop.fetchAndStoreRelaxed(0);
m_state = Running;
Q_EMIT stateChange(m_state);
FirstRunIndexer::Targets rebuiltTarget = checkAndRebuild();
FirstRunIndexer::WorkModes mode = FirstRunIndexer::WorkMode::Update;
FirstRunIndexer::Targets target = FirstRunIndexer::Target::None;
//如果数据库被执行过重建,那么跳过增量更新步骤。
if(m_config->isFileIndexEnable() && !(rebuiltTarget & FirstRunIndexer::Target::Basic)) {
target |= FirstRunIndexer::Target::Basic;
m_statusRecorder->setStatus(INDEX_DATABASE_STATE, IndexStatusRecorder::State::Updating);
}
if(m_config->isContentIndexEnable() && !(rebuiltTarget & FirstRunIndexer::Target::Content)) {
target |= FirstRunIndexer::Target::Content;
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE, IndexStatusRecorder::State::Updating);
}
startIndexJob(mode, target);
//启动监听
m_fileWatcher.installWatches();
}
IndexScheduler::IndexerState IndexScheduler::getIndexState()
{
return m_state;
}
FirstRunIndexer::Targets IndexScheduler::checkAndRebuild()
{
FirstRunIndexer::WorkModes mode = FirstRunIndexer::WorkMode::Rebuild;
FirstRunIndexer::Targets target = FirstRunIndexer::Target::None;
if(m_statusRecorder->getStatus(INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Error && m_config->isFileIndexEnable()) {
qDebug() << "Basic database error,need rebuild";
target |= FirstRunIndexer::Target::Basic;
m_statusRecorder->setStatus(INDEX_DATABASE_STATE, IndexStatusRecorder::State::Initializing);
}
if(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Error && m_config->isFileIndexEnable()) {
qDebug() << "Content database error,need rebuild";
target |= FirstRunIndexer::Target::Content;
m_statusRecorder->setStatus(INDEX_DATABASE_STATE, IndexStatusRecorder::State::Initializing);
}
startIndexJob(mode, target);
return target;
}
void IndexScheduler::startIndexJob(FirstRunIndexer::WorkModes &mode, FirstRunIndexer::Targets &target)
{
if(FirstRunIndexer::Target::None != target) {
FirstRunIndexer *indexer = new FirstRunIndexer(m_config->currentIndexableDir(), m_config->currentBlackListOfIndex(), m_stop, mode, target);
connect(indexer, &FirstRunIndexer::done, this, &IndexScheduler::firstRunFinished, Qt::QueuedConnection);
connect(indexer, &FirstRunIndexer::progress, this, &IndexScheduler::process);
connect(indexer, &FirstRunIndexer::basicIndexDone, this, [&](uint size){
bool success = false;
if(!(m_statusRecorder->getStatus(INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Error)) {
m_statusRecorder->setStatus(INDEX_DATABASE_STATE, IndexStatusRecorder::State::Ready);
success = true;
}
Q_EMIT basicIndexDone(size, success);
});
connect(indexer, &FirstRunIndexer::contentIndexDone, this, [&](uint size){
bool success = false;
if(!(m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Error)) {
m_statusRecorder->setStatus(CONTENT_INDEX_DATABASE_STATE, IndexStatusRecorder::State::Ready);
success = true;
}
Q_EMIT contentIndexDone(size, success);
});
m_threadPool.start(indexer);
}
}
void IndexScheduler::fileIndexEnable(bool enable)
{
if(enable) {
scheduleIndexing();
} else {
stop();
}
}
void IndexScheduler::updateIndex(const QVector<PendingFile> &files)
{
qDebug() << "updateIndex=====";
m_isUpdateFinished = false;
m_state = Running;
IndexUpdater *updateJob = new IndexUpdater(files, m_stop);
connect(updateJob, &IndexUpdater::done, this, &IndexScheduler::updateFinished, Qt::QueuedConnection);
m_threadPool.start(updateJob);
}
void IndexScheduler::firstRunFinished()
{
if((m_statusRecorder->getStatus(INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Ready)
&& m_statusRecorder->getStatus(CONTENT_INDEX_DATABASE_STATE).toInt() == IndexStatusRecorder::State::Ready) {
m_isFirstRunFinished = true;
}
if(m_isFirstRunFinished && m_isAddNewPathFinished && m_isUpdateFinished) {
m_state = Idle;
Q_EMIT stateChange(m_state);
}
}
void IndexScheduler::updateFinished()
{
m_isUpdateFinished = true;
if(m_isFirstRunFinished && m_isAddNewPathFinished) {
m_state = Idle;
Q_EMIT stateChange(m_state);
}
}
void IndexScheduler::addNewPathFinished()
{
m_isAddNewPathFinished = true;
if(m_isFirstRunFinished && m_isUpdateFinished) {
m_state = Idle;
Q_EMIT stateChange(m_state);
}
}

View File

@ -0,0 +1,95 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#ifndef INDEXSCHEDULER_H
#define INDEXSCHEDULER_H
#include <QObject>
#include <QThreadPool>
#include <QAtomicInt>
#include "file-watcher.h"
#include "index-status-recorder.h"
#include "common.h"
#include "first-run-indexer.h"
namespace UkuiSearch {
class IndexScheduler : public QObject
{
Q_OBJECT
public:
enum IndexerState {
Startup,
Running,
Idle,
Stop
};
Q_ENUM(IndexerState)
explicit IndexScheduler(QObject *parent = nullptr);
/**
* @brief addNewPath
* @param folders
* @param blackList
*/
Q_SCRIPTABLE void addNewPath(const QString &folders, const QStringList& blackList = QStringList());
/**
* @brief removeIndex
* @param folders
*/
Q_SCRIPTABLE void removeIndex(const QString& folders);
Q_SCRIPTABLE void stop();
Q_SCRIPTABLE void scheduleIndexing();
Q_SCRIPTABLE IndexerState getIndexState();
Q_SIGNALS:
void stateChange(IndexerState);
void process(IndexType type, uint all, uint finished);
void basicIndexDone(uint size, bool success);
void contentIndexDone(uint size, bool success);
void done();
private Q_SLOTS:
void fileIndexEnable(bool enable);
void updateIndex(const QVector<PendingFile>& files);
void firstRunFinished();
void updateFinished();
void addNewPathFinished();
private:
/**
* @brief checkAndRebuild
* IndexStatusRecorder::State::Error
* @return
*/
FirstRunIndexer::Targets checkAndRebuild();
void startIndexJob(FirstRunIndexer::WorkModes &mode, FirstRunIndexer::Targets &target);
FileWatcher m_fileWatcher;
IndexStatusRecorder *m_statusRecorder = nullptr;
FileIndexerConfig *m_config = nullptr;
IndexerState m_state;
QAtomicInt m_stop;
QThreadPool m_threadPool;
bool m_isFirstRunFinished = true;
bool m_isUpdateFinished = true;
bool m_isAddNewPathFinished = true;
};
}
#endif // INDEXSCHEDULER_H

View File

@ -1,3 +1,22 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "index-status-recorder.h"
#include <mutex>
@ -12,10 +31,15 @@ IndexStatusRecorder *IndexStatusRecorder::getInstance()
return m_instance;
}
void IndexStatusRecorder::setStatus(const QString &key, const QVariant &value)
IndexStatusRecorder::IndexStatusRecorder(QObject *parent) : QObject(parent)
{
m_status = new QSettings(INDEX_STATUS, QSettings::IniFormat, this);
}
void IndexStatusRecorder::setStatus(const QString &key, State state)
{
m_mutex.lock();
m_status->setValue(key, value);
m_status->setValue(key, state);
m_status->sync();
m_mutex.unlock();
}
@ -31,7 +55,11 @@ bool IndexStatusRecorder::indexDatabaseEnable()
m_mutex.lock();
m_status->sync();
m_mutex.unlock();
return m_status->value(INDEX_DATABASE_STATE, QVariant(false)).toBool();
if(m_status->value(INDEX_DATABASE_STATE, 0).toInt() == State::Ready) {
return true;
} else {
return false;
}
}
@ -40,20 +68,9 @@ bool IndexStatusRecorder::contentIndexDatabaseEnable()
m_mutex.lock();
m_status->sync();
m_mutex.unlock();
return m_status->value(CONTENT_INDEX_DATABASE_STATE, QVariant(false)).toBool();
}
bool IndexStatusRecorder::ocrDatabaseEnable()
{
m_mutex.lock();
m_status->sync();
m_mutex.unlock();
return m_status->value(OCR_DATABASE_STATE, QVariant(false)).toBool();
}
IndexStatusRecorder::IndexStatusRecorder(QObject *parent) : QObject(parent)
{
m_status = new QSettings(INDEX_STATUS, QSettings::IniFormat, this);
if(m_status->value(CONTENT_INDEX_DATABASE_STATE, 0).toInt() == State::Ready) {
return true;
} else {
return false;
}
}

View File

@ -1,3 +1,22 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#ifndef INDEXSTATUSRECORDER_H
#define INDEXSTATUSRECORDER_H
@ -7,9 +26,6 @@
#include <QMutex>
#define CONTENT_INDEX_DATABASE_STATE "content_index_database_state"
#define INDEX_DATABASE_STATE "index_database_state"
#define OCR_DATABASE_STATE "ocr_database_state"
#define INOTIFY_NORMAL_EXIT "inotify_normal_exit" // 1 - 出错2 - 正常3-关闭索引; 0-有信号正在处理
#define PENDING_FILE_QUEUE_FINISH "pending_file_queue_finish"
#define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf"
namespace UkuiSearch {
//fixme: we need a better way to record index status.
@ -17,17 +33,24 @@ class IndexStatusRecorder : public QObject
{
Q_OBJECT
public:
enum State{
Initializing = 0,
Error = 1,
Ready = 2,
Updating = 3
};
Q_ENUM(State)
static IndexStatusRecorder *getInstance();
void setStatus(const QString& key, const QVariant& value);
void setStatus(const QString& key, UkuiSearch::IndexStatusRecorder::State state);
const QVariant getStatus(const QString& key);
bool indexDatabaseEnable();
bool contentIndexDatabaseEnable();
bool ocrDatabaseEnable();
private:
explicit IndexStatusRecorder(QObject *parent = nullptr);
static IndexStatusRecorder *m_instance;
QSettings *m_status;
QSettings *m_status = nullptr;
QMutex m_mutex;
};
}

View File

@ -0,0 +1,126 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "index-updater.h"
#include <malloc.h>
#include "writable-database.h"
#include "basic-indexer.h"
#include "file-indexer-config.h"
#include "file-content-indexer.h"
#include "common.h"
#include "file-utils.h"
using namespace UkuiSearch;
IndexUpdater::IndexUpdater(const QVector<PendingFile>& files, QAtomicInt &stop)
: m_cache(files),
m_stop(&stop)
{
}
void IndexUpdater::UpdateIndex()
{
if(FileIndexerConfig::getInstance()->isFileIndexEnable()) {
WritableDatabase basicDb(DataBaseType::Basic);
if(!basicDb.open()) {
qWarning() << "Basic db open failed, fail to update index";
return;
}
qDebug() << "===update basic index===";
for(PendingFile file : m_cache) {
if(file.shouldRemoveIndex()) {
qDebug() << "| remove:" <<file.path();
basicDb.removeDocument(file.path());
} else {
qDebug() << "| index:" <<file.path();
BasicIndexer indexer(file.path());
if(indexer.index()) {
basicDb.addDocument(indexer.document());
}
}
}
basicDb.commit();
qDebug() << "===finish update basic index===";
}
if(FileIndexerConfig::getInstance()->isContentIndexEnable()) {
if(m_stop->load()) {
qDebug() << "Index stopped, abort update content index.";
return;
}
WritableDatabase contentDb(DataBaseType::Content);
if(!contentDb.open()) {
qWarning() << "Content db open failed, fail to update index";
return;
}
QMap<QString, bool> suffixMap = targetFileTypeMap;
//ocr
if(FileIndexerConfig::getInstance()->isOCREnable()) {
suffixMap.unite(targetPhotographTypeMap);
}
qDebug() << "===update content index===";
int size = 0;
for(PendingFile file : m_cache) {
QString suffix = QFileInfo(file.path()).suffix();
if(file.shouldRemoveIndex()) {
qDebug() << "| remove:" <<file.path();
if(file.isDir()) {
contentDb.removeDocument(file.path());
} else if(true == suffixMap[suffix]) {
contentDb.removeDocument(file.path());
}
} else if(true == suffixMap[suffix] && !file.isDir()) {
if(FileUtils::isEncrypedOrUnsupport(file.path(), suffix)) {
contentDb.removeDocument(file.path());
continue;
}
qDebug() << "| index:" <<file.path();
fileContentIndexer indexer(file.path());
if(indexer.index()) {
contentDb.addDocument(indexer.document());
++size;
} else if(file.isModified()){
contentDb.removeDocument(file.path());
}
}
if(size >= 30) {
contentDb.commit();
qDebug() << "30 finished.";
size = 0;
}
if(m_stop->load()) {
qDebug() << "Index stopped, content index update interrupted";
m_cache.clear();
m_cache.shrink_to_fit();
malloc_trim(0);
return;
}
}
contentDb.commit();
qDebug() << "===finish update content index===";
}
m_cache.clear();
m_cache.shrink_to_fit();
malloc_trim(0);
Q_EMIT done();
}
void IndexUpdater::run()
{
UpdateIndex();
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -14,33 +14,34 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangzihao <zhangzihao@kylinos.cn>
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#ifndef TRAVERSE_BFS_H
#define TRAVERSE_BFS_H
#ifndef INDEXUPDATER_H
#define INDEXUPDATER_H
#include <QDebug>
#include <QDir>
#include <QQueue>
#include <QRunnable>
#include <QObject>
#include <QVector>
#include <QAtomicInt>
#include "pending-file.h"
namespace UkuiSearch {
class TraverseBFS {
class IndexUpdater : public QObject, public QRunnable
{
Q_OBJECT
public:
TraverseBFS() = default;
void Traverse();
virtual ~TraverseBFS() = default;
virtual void work(const QFileInfo&) = 0;
void setPath(const QStringList&);
void setBlockPath(const QStringList &pathList);
protected:
TraverseBFS(const QStringList&);
QStringList m_pathList;
QStringList m_blockList;
explicit IndexUpdater(const QVector<PendingFile>& files, QAtomicInt& stop);
void run() override;
Q_SIGNALS:
void done();
private:
TraverseBFS(const TraverseBFS&) = delete;
void operator=(const TraverseBFS&) = delete;
void UpdateIndex();
QVector<PendingFile> m_cache;
QAtomicInt *m_stop = nullptr;
};
}
#endif // TRAVERSE_BFS_H
#endif // INDEXUPDATER_H

View File

@ -1,36 +1,45 @@
INCLUDEPATH += $$PWD
HEADERS += \
$$PWD/construct-document.h \
$$PWD/basic-indexer.h \
$$PWD/database.h \
$$PWD/document.h \
# $$PWD/file-iterator.h \
$$PWD/file-content-indexer.h \
$$PWD/file-reader.h \
$$PWD/file-search-plugin.h \
$$PWD/first-index.h \
$$PWD/index-generator.h \
$$PWD/first-run-indexer.h \
$$PWD/index-scheduler.h \
$$PWD/index-status-recorder.h \
$$PWD/inotify-watch.h \
$$PWD/monitor.h \
$$PWD/ocrobject.h \
$$PWD/pending-file-queue.h \
$$PWD/pending-file.h \
$$PWD/search-manager.h \
$$PWD/file-index-manager.h \
$$PWD/traverse-bfs.h \
$$PWD/ukui-search-qdbus.h
$$PWD/ukui-search-qdbus.h \
$$PWD/file-indexer-config.h \
$$PWD/file-watcher.h \
$$PWD/index-updater.h \
$$PWD/writable-database.h
SOURCES += \
$$PWD/construct-document.cpp \
$$PWD/basic-indexer.cpp \
$$PWD/database.cpp \
$$PWD/document.cpp \
$$PWD/file-index-manager.cpp \
$$PWD/file-content-indexer.cpp \
$$PWD/file-reader.cpp \
$$PWD/file-search-plugin.cpp \
$$PWD/first-index.cpp \
$$PWD/index-generator.cpp \
$$PWD/first-run-indexer.cpp \
$$PWD/index-scheduler.cpp \
$$PWD/index-status-recorder.cpp \
$$PWD/inotify-watch.cpp \
$$PWD/monitor.cpp \
$$PWD/ocrobject.cpp \
$$PWD/pending-file-queue.cpp \
$$PWD/pending-file.cpp \
$$PWD/search-manager.cpp \
$$PWD/traverse-bfs.cpp \
$$PWD/ukui-search-qdbus.cpp
$$PWD/ukui-search-qdbus.cpp \
$$PWD/file-indexer-config.cpp \
$$PWD/file-watcher.cpp \
$$PWD/index-updater.cpp \
$$PWD/writable-database.cpp

View File

@ -1,512 +0,0 @@
#include "inotify-watch.h"
#include <QMutexLocker>
#include <sys/ioctl.h>
#include <malloc.h>
#include <errno.h>
#include "dir-watcher.h"
using namespace UkuiSearch;
static InotifyWatch* global_instance_InotifyWatch = nullptr;
UkuiSearch::InotifyWatch *UkuiSearch::InotifyWatch::getInstance()
{
if(!global_instance_InotifyWatch) {
global_instance_InotifyWatch = new InotifyWatch();
}
return global_instance_InotifyWatch;
}
UkuiSearch::InotifyWatch::InotifyWatch(): TraverseBFS(), m_semaphore(INDEX_SEM, 0, QSystemSemaphore::AccessMode::Open)
{
qDebug() << "setInotifyMaxUserWatches start";
UkuiSearchQDBus usQDBus;
usQDBus.setInotifyMaxUserWatches();
qDebug() << "setInotifyMaxUserWatches end";
m_sharedMemory = new QSharedMemory("ukui-search-shared-map", this);
}
InotifyWatch::~InotifyWatch()
{
if(m_notifier)
delete m_notifier;
m_notifier = nullptr;
}
bool InotifyWatch::addWatch(const QString &path)
{
int ret = inotify_add_watch(m_inotifyFd, path.toStdString().c_str(), (IN_MOVED_FROM | IN_MOVED_TO | IN_CREATE | IN_DELETE | IN_MODIFY));
if(ret == -1) {
qWarning() << "AddWatch error:" << path;
return false;
}
m_pathMap[ret] = path;
// qDebug() << "Watch: " << path << "ret: " << ret;
return true;
}
bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase)
{
inotify_rm_watch(m_inotifyFd, m_pathMap.key(path));
if(removeFromDatabase) {
for(QMap<int, QString>::Iterator i = m_pathMap.begin(); i != m_pathMap.end();) {
// qDebug() << i.value();
// if(i.value().length() > path.length()) {
if(FileUtils::isOrUnder(i.value(), path)) {
qDebug() << "remove path: " << i.value();
inotify_rm_watch(m_inotifyFd, m_pathMap.key(path));
PendingFile f(i.value());
f.setDeleted();
f.setIsDir();
PendingFileQueue::getInstance()->enqueue(f);
m_pathMap.erase(i++);
} else {
i++;
}
}
} else {
for(QMap<int, QString>::Iterator i = m_pathMap.begin(); i != m_pathMap.end();) {
// qDebug() << i.value();
if(i.value().length() > path.length()) {
if(FileUtils::isOrUnder(i.value(), path)) {
// if(i.value().startsWith(path + "/")) {
// qDebug() << "remove path: " << i.value();
inotify_rm_watch(m_inotifyFd, m_pathMap.key(path));
m_pathMap.erase(i++);
} else {
i++;
}
} else {
i++;
}
}
}
m_pathMap.remove(m_pathMap.key(path));
return true;
}
void InotifyWatch::work(const QFileInfo &info)
{
qDebug() << info.fileName() << "-------" << info.absoluteFilePath();
if(info.isDir() && (!info.isSymLink())) {
this->addWatch(info.absoluteFilePath());
}
PendingFile f(info.absoluteFilePath());
if(info.isDir()) {
f.setIsDir();
}
PendingFileQueue::getInstance()->enqueue(f);
}
void InotifyWatch::firstTraverse(QStringList pathList, QStringList blockList)
{
QMutexLocker locker(&m_pathMapLock);
if(pathList.isEmpty()) {
pathList = m_pathList;
}
if(blockList.isEmpty()) {
blockList = m_blockList;
}
QQueue<QString> bfs;
for(QString blockPath : blockList) {
for(QString path : pathList) {
if(FileUtils::isOrUnder(path, blockPath)) {
pathList.removeOne(path);
}
}
}
for(QString path : pathList) {
addWatch(path);
bfs.enqueue(path);
}
QFileInfoList list;
QDir dir;
QStringList tmpList = blockList;
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
while(!bfs.empty()) {
dir.setPath(bfs.dequeue());
list = dir.entryInfoList();
for(auto i : list) {
bool isBlocked = false;
for(QString path : tmpList) {
if(i.absoluteFilePath() == path) {
isBlocked = true;
tmpList.removeOne(path);
break;
}
}
if(isBlocked)
continue;
if(i.isDir() && (!(i.isSymLink()))) {
addWatch(i.absoluteFilePath());
bfs.enqueue(i.absoluteFilePath());
}
}
}
}
void InotifyWatch::addIndexPath(const QString path, const QStringList blockList)
{
this->firstTraverse(QStringList() << path, blockList);
}
void InotifyWatch::removeIndexPath(const QString &path, bool fileIndexEnable)
{
QMutexLocker locker(&m_pathMapLock);
if(fileIndexEnable) {
removeWatch(path, true);
}else {
for(QMap<int, QString>::Iterator i = m_pathMap.begin(); i != m_pathMap.end();) {
if(FileUtils::isOrUnder(i.value(), path)) {
qDebug() << "remove path: " << i.value();
PendingFile f(i.value());
f.setDeleted();
f.setIsDir();
PendingFileQueue::getInstance()->enqueue(f);
m_pathMap.erase(i++);
} else {
i++;
}
}
}
PendingFileQueue::getInstance()->forceFinish();
PendingFileQueue::getInstance()->~PendingFileQueue();
}
void InotifyWatch::stopWatch()
{
// if(this->isRunning()) {
// this->quit();
// if(m_notifier)
// delete m_notifier;
// m_notifier = nullptr;
// removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false);
// }
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3");
}
void InotifyWatch::run()
{
m_inotifyFd = inotify_init();
if (m_inotifyFd > 0) {
qDebug()<<"Inotify init success!";
} else {
qWarning() << "Inotify init fail! Now try add inotify_user_instances.";
UkuiSearchQDBus usQDBus;
usQDBus.addInotifyUserInstances(128);
m_inotifyFd = inotify_init();
if (m_inotifyFd > 0) {
qDebug()<<"Inotify init success!";
} else {
printf("errno=%d\n",errno);
printf("Mesg:%s\n",strerror(errno));
Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify");
}
}
setPath(DirWatcher::getDirWatcher()->currentIndexableDir());
setBlockPath(DirWatcher::getDirWatcher()->currentBlackListOfIndex());
firstTraverse();
while(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
fd_set fds;
FD_ZERO(&fds);
FD_SET(m_inotifyFd, &fds);
int rc;
rc = select(m_inotifyFd + 1, &fds, NULL, NULL, NULL);
if(rc > 0) {
++FileUtils::indexStatus;
int avail;
if (ioctl(m_inotifyFd, FIONREAD, &avail) == EINVAL) {
qWarning() << "Did not receive an entire inotify event.";
--FileUtils::indexStatus;
return;
}
char* buf = (char*)malloc(avail);
memset(buf, 0x00, avail);
const ssize_t len = read(m_inotifyFd, buf, avail);
if(len != avail) {
qWarning()<<"read event error";
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
}
int i = 0;
while (i < len) {
const struct inotify_event* event = (struct inotify_event*)&buf[i];
if(event->name[0] != '.') {
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
// qDebug("mask:0x%x,",event->mask);
break;
}
i += sizeof(struct inotify_event) + event->len;
}
if(i < len ) {
qDebug() << "fork";
slotEvent(buf, len);
}
free(buf);
--FileUtils::indexStatus;
} else if(rc < 0) {
// error
qWarning() << "select result < 0, error!";
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
assert(false);
}
}
qDebug() << "Leave watch loop";
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3");
for(QString path : m_pathMap) {
inotify_rm_watch(m_inotifyFd, m_pathMap.key(path));
}
m_pathMap.clear();
}
close(m_inotifyFd);
// fcntl(m_inotifyFd, F_SETFD, FD_CLOEXEC);
// m_notifier = new QSocketNotifier(m_inotifyFd, QSocketNotifier::Read);
// connect(m_notifier, &QSocketNotifier::activated, this, &InotifyWatch::slotEvent, Qt::DirectConnection);
// exec();
}
void InotifyWatch::slotEvent(char *buf, ssize_t len)
{
// eventProcess(socket);
if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
pid_t pid;
pid = fork();
if(pid == 0) {
prctl(PR_SET_PDEATHSIG, SIGTERM);
prctl(PR_SET_NAME, "inotify-index");
this->eventProcess(buf, len);
fd_set read_fds;
int rc;
timeval* read_timeout = (timeval*)malloc(sizeof(timeval));
read_timeout->tv_sec = 40;
read_timeout->tv_usec = 0;
for(;;) {
FD_ZERO(&read_fds);
FD_SET(m_inotifyFd, &read_fds);
rc = select(m_inotifyFd + 1, &read_fds, NULL, NULL, read_timeout);
if(rc < 0) {
// error
qWarning() << "fork select result < 0, error!";
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
assert(false);
} else if(rc == 0) {
qDebug() << "select timeout!";
::free(read_timeout);
QBuffer buffer;
QDataStream out(&buffer);
if (m_sharedMemory->isAttached()) {
m_sharedMemory->detach();
}
buffer.open(QBuffer::ReadWrite);
out << m_pathMap;
int size = buffer.size();
if (!m_sharedMemory->create(size)) {
qDebug() << "Create sharedMemory Error: " << m_sharedMemory->errorString();
} else {
m_sharedMemory->lock();
char *to = static_cast<char *>(m_sharedMemory->data());
const char *from = buffer.data().constData();
memcpy(to, from, qMin(size, m_sharedMemory->size()));
m_sharedMemory->unlock();
}
// GlobalSettings::getInstance()->forceSync();
PendingFileQueue::getInstance()->forceFinish();
PendingFileQueue::getInstance()->~PendingFileQueue();
::_exit(0);
} else {
// qDebug() << "Select remain:" <<read_timeout->tv_sec;
this->eventProcess(m_inotifyFd);
// qDebug() << "Select remain:" <<read_timeout->tv_sec;
}
}
} else if(pid > 0) {
waitpid(pid, NULL, 0);
if (!m_sharedMemory->attach()) {
qDebug() << "SharedMemory attach Error: " << m_sharedMemory->errorString();
} else {
QBuffer buffer;
QDataStream in(&buffer);
QMap<int, QString> pathMap;
m_sharedMemory->lock();
buffer.setData(static_cast<const char *>(m_sharedMemory->constData()), m_sharedMemory->size());
buffer.open(QBuffer::ReadWrite);
in >> pathMap;
m_sharedMemory->unlock();
m_sharedMemory->detach();
m_pathMapLock.lock();
m_pathMap = pathMap;
m_pathMapLock.unlock();
}
} else {
assert(false);
}
}
}
char * InotifyWatch::filter()
{
int avail;
if (ioctl(m_inotifyFd, FIONREAD, &avail) == EINVAL) {
qWarning() << "Did not receive an entire inotify event.";
return NULL;
}
char* buffer = (char*)malloc(avail);
memset(buffer, 0x00, avail);
const int len = read(m_inotifyFd, buffer, avail);
if(len != avail) {
qWarning()<<"read event error";
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
}
int i = 0;
while (i < len) {
const struct inotify_event* event = (struct inotify_event*)&buffer[i];
if(event->name[0] == '.') {
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
// qDebug("mask:0x%x,",event->mask);
i += sizeof(struct inotify_event) + event->len;
return buffer;
}
}
free(buffer);
return NULL;
}
void InotifyWatch::eventProcess(int socket)
{
// qDebug()<< "Enter eventProcess!";
int avail;
if (ioctl(socket, FIONREAD, &avail) == EINVAL) {
qWarning() << "Did not receive an entire inotify event.";
return;
}
char* buffer = (char*)malloc(avail);
memset(buffer, 0x00, avail);
const ssize_t len = read(socket, buffer, avail);
if(len != avail) {
qWarning()<<"read event error";
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
}
int i = 0;
while (i < len) {
const struct inotify_event* event = (struct inotify_event*)&buffer[i];
if(event->name[0] != '.') {
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
// qDebug("mask:0x%x,",event->mask);
break;
}
i += sizeof(struct inotify_event) + event->len;
}
if(i >= len) {
qDebug() << "There is nothing to do!";
free(buffer);
return;
}
eventProcess(buffer, len);
free(buffer);
}
void InotifyWatch::eventProcess(const char *buffer, ssize_t len)
{
// qDebug()<< "Begin eventProcess! len:" << len;
char * p = const_cast<char*>(buffer);
while (p < buffer + len) {
const struct inotify_event* event = reinterpret_cast<inotify_event *>(p);
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
// qDebug("mask:0x%x,",event->mask);
if(event->name[0] != '.') {
QString path = m_pathMap[event->wd] + '/' + event->name;
//过滤黑名单下的信号
for(QString i : m_blockList) {
if(FileUtils::isOrUnder(path, i))
goto next;
}
//Create top dir first, traverse it last.
if(event->mask & IN_CREATE) {
// qDebug() << "IN_CREATE";
PendingFile f(path);
if(event->mask & IN_ISDIR) {
f.setIsDir();
}
PendingFileQueue::getInstance()->enqueue(f);
if(event->mask & IN_ISDIR) {
if(!QFileInfo(path).isSymLink()){
addWatch(path);
setPath(QStringList() << path);
Traverse();
}
}
goto next;
}
if((event->mask & IN_DELETE) | (event->mask & IN_MOVED_FROM)) {
qDebug() << "IN_DELETE or IN_MOVED_FROM";
if(event->mask & IN_ISDIR) {
removeWatch(path);
} else {
PendingFile f(path);
f.setDeleted();
PendingFileQueue::getInstance()->enqueue(f);
}
p += sizeof(struct inotify_event) + event->len;
continue;
}
if(event->mask & IN_MODIFY) {
// qDebug() << "IN_MODIFY";
if(!(event->mask & IN_ISDIR)) {
PendingFileQueue::getInstance()->enqueue(PendingFile(path));
}
goto next;
}
if(event->mask & IN_MOVED_TO) {
qDebug() << "IN_MOVED_TO";
if(event->mask & IN_ISDIR) {
removeWatch(path);
PendingFile f(path);
f.setIsDir();
PendingFileQueue::getInstance()->enqueue(f);
if(!QFileInfo(path).isSymLink()){
addWatch(path);
setPath(QStringList() << path);
Traverse();
}
} else {
//Enqueue a deleted file to merge.
PendingFile f(path);
f.setDeleted();
PendingFileQueue::getInstance()->enqueue(f);
//Enqueue a new one.
PendingFileQueue::getInstance()->enqueue(PendingFile(path));
}
goto next;
}
}
next:
p += sizeof(struct inotify_event) + event->len;
}
// qDebug()<< "Finish eventProcess!";
}

View File

@ -1,57 +0,0 @@
#ifndef INOTIFYWATCH_H
#define INOTIFYWATCH_H
#include <QThread>
#include <QBuffer>
#include <QSocketNotifier>
#include <QDataStream>
#include <QSharedMemory>
#include <QSystemSemaphore>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/inotify.h>
#include <unistd.h>
#include "traverse-bfs.h"
#include "ukui-search-qdbus.h"
#include "index-status-recorder.h"
#include "file-utils.h"
#include "pending-file-queue.h"
#include "common.h"
namespace UkuiSearch {
class InotifyWatch : public QThread, public TraverseBFS
{
Q_OBJECT
public:
static InotifyWatch* getInstance();
bool addWatch(const QString &path);
bool removeWatch(const QString &path, bool removeFromDatabase = true);
virtual void work(const QFileInfo &info) final;
void firstTraverse(QStringList pathList = {}, QStringList blockList = {});
void stopWatch();
void addIndexPath(const QString path, const QStringList blockList);
void removeIndexPath(const QString &path, bool fileIndexEnable);
protected:
void run() override;
private Q_SLOTS:
void slotEvent(char *buf, ssize_t len);
private:
explicit InotifyWatch();
~InotifyWatch();
char * filter();
void eventProcess(int socket);
void eventProcess(const char *buffer, ssize_t len);
int m_inotifyFd;
QSocketNotifier* m_notifier = nullptr;
QSharedMemory *m_sharedMemory = nullptr;
QMap<int, QString> m_pathMap;
QMutex m_pathMapLock;
QSystemSemaphore m_semaphore;
};
}
#endif // INOTIFYWATCH_H

View File

@ -0,0 +1,97 @@
#include "monitor.h"
#include "file-indexer-config.h"
using namespace UkuiSearch;
Monitor::Monitor(IndexScheduler *scheduler, QObject *parent)
: QObject(parent),
m_scheduler(scheduler),
m_basicDatabase(DataBaseType::Basic),
m_contentDatabase(DataBaseType::Content)
{
connect(scheduler, &IndexScheduler::stateChange, this, &Monitor::indexStateChanged);
connect(scheduler, &IndexScheduler::stateChange, this, &Monitor::onIndexStateChanged);
connect(scheduler, &IndexScheduler::process, this, &Monitor::processUpdate);
}
QStringList Monitor::getCurrentIndexPaths()
{
return FileIndexerConfig::getInstance()->currentIndexableDir();
}
IndexScheduler::IndexerState Monitor::getIndexState()
{
return m_scheduler->getIndexState();
}
uint Monitor::getBasicIndexSize()
{
return m_basicIndexSize;
}
uint Monitor::getContentIndexSize()
{
return m_contentIndexSize;
}
uint Monitor::getOCRIndexSize()
{
return m_ocrIndexSize;
}
uint Monitor::getBasicIndexProgress()
{
return m_basicIndexProgress;
}
uint Monitor::getContentIndexProgress()
{
return m_contentIndexProgress;
}
uint Monitor::getOCRIndexProgress()
{
return m_ocrIndexProgress;
}
uint Monitor::getBasicIndexDocNum()
{
return m_basicDatabase.getIndexDocCount();
}
uint Monitor::getContentIndexDocNum()
{
return m_contentDatabase.getIndexDocCount();
}
void Monitor::onIndexStateChanged(IndexScheduler::IndexerState state)
{
if(state == IndexScheduler::IndexerState::Idle) {
Q_EMIT basicIndexDocNumUpdate(m_basicDatabase.getIndexDocCount());
Q_EMIT contentIndexDocNumUpdate(m_contentDatabase.getIndexDocCount());
}
}
void Monitor::processUpdate(IndexType type, uint all, uint finished)
{
switch (type) {
case IndexType::Basic:
m_basicIndexSize = all;
Q_EMIT basicIndexSizeChange(m_basicIndexSize);
m_basicIndexProgress = finished;
Q_EMIT basicIndexProgressUpdate(m_basicIndexProgress);
break;
case IndexType::Contents:
m_contentIndexSize = all;
Q_EMIT contentIndexSizeChange(m_contentIndexSize);
m_contentIndexProgress = finished;
Q_EMIT contentIndexProgressUpdate(m_contentIndexProgress);
break;
case IndexType::OCR:
m_ocrIndexSize = all;
Q_EMIT ocrIndexSizeChange(m_ocrIndexSize);
m_contentIndexProgress = finished;
Q_EMIT ocrIndexProgressUpdate(m_contentIndexProgress);
break;
default:
break;
}
}

68
libsearch/index/monitor.h Normal file
View File

@ -0,0 +1,68 @@
#ifndef MONITOR_H
#define MONITOR_H
#include <QObject>
#include "index-scheduler.h"
#include "database.h"
namespace UkuiSearch {
/**
* @brief The Monitor class
*
* qml
*/
class Monitor : public QObject
{
Q_OBJECT
Q_PROPERTY(QStringList currentIndexPaths READ getCurrentIndexPaths)
Q_PROPERTY(IndexScheduler::IndexerState indexState READ getIndexState NOTIFY indexStateChanged)
Q_PROPERTY(uint basicIndexSize READ getBasicIndexSize NOTIFY basicIndexSizeChange)
Q_PROPERTY(uint contentIndexSize READ getContentIndexSize NOTIFY contentIndexSizeChange)
Q_PROPERTY(uint ocrIndexSize READ getOCRIndexSize NOTIFY ocrIndexSizeChange)
Q_PROPERTY(uint basicIndexProgress READ getBasicIndexProgress NOTIFY basicIndexProgressUpdate)
Q_PROPERTY(uint contentIndexProgress READ getContentIndexProgress NOTIFY contentIndexProgressUpdate)
Q_PROPERTY(uint ocrIndexProgress READ getOCRIndexProgress NOTIFY ocrIndexProgressUpdate)
Q_PROPERTY(uint basicIndexDocNum READ getBasicIndexDocNum NOTIFY basicIndexDocNumUpdate)
Q_PROPERTY(uint contentIndexDocNum READ getContentIndexDocNum NOTIFY contentIndexDocNumUpdate)
public:
explicit Monitor(IndexScheduler* scheduler, QObject *parent = nullptr);
QStringList getCurrentIndexPaths();
IndexScheduler::IndexerState getIndexState();
uint getBasicIndexSize();
uint getContentIndexSize();
uint getOCRIndexSize();
uint getBasicIndexProgress();
uint getContentIndexProgress();
uint getOCRIndexProgress();
uint getBasicIndexDocNum();
uint getContentIndexDocNum();
Q_SIGNALS:
void indexStateChanged(IndexScheduler::IndexerState);
void basicIndexSizeChange(uint);
void contentIndexSizeChange(uint);
void ocrIndexSizeChange(uint);
void basicIndexProgressUpdate(uint);
void contentIndexProgressUpdate(uint);
void ocrIndexProgressUpdate(uint);
void basicIndexDocNumUpdate(uint);
void contentIndexDocNumUpdate(uint);
private Q_SLOTS:
void onIndexStateChanged(IndexScheduler::IndexerState);
void processUpdate(IndexType type, uint all, uint finished);
private:
IndexScheduler *m_scheduler = nullptr;
Database m_basicDatabase;
Database m_contentDatabase;
uint m_basicIndexSize = 0;
uint m_contentIndexSize = 0;
uint m_ocrIndexSize = 0;
uint m_basicIndexProgress = 0;
uint m_contentIndexProgress = 0;
uint m_ocrIndexProgress = 0;
};
}
#endif // MONITOR_H

View File

@ -11,7 +11,7 @@ OcrObject *OcrObject::getInstance()
return m_instance;
}
void OcrObject::getTxtContent(QString &path, QString &textcontent)
void OcrObject::getTxtContent(const QString &path, QString &textcontent)
{
// m_api = new tesseract::TessBaseAPI();
// if (m_api->Init(NULL, "chi_sim")) {
@ -53,7 +53,7 @@ void OcrObject::getTxtContent(QString &path, QString &textcontent)
Pix *image = pixRead(path.toStdString().data());
if (!image) {
qDebug() << "path:" << path <<" pixRead error!";
// qDebug() << "path:" << path <<" pixRead error!";
if (api) {
api->End();
delete api;
@ -62,7 +62,9 @@ void OcrObject::getTxtContent(QString &path, QString &textcontent)
return;
}
api->SetImage(image);
textcontent = api->GetUTF8Text();
char *tmp = api->GetUTF8Text();
textcontent = QString::fromLocal8Bit(tmp);
delete [] tmp;
//qDebug() << " Text:" << textcontent;
pixDestroy(&image);
api->Clear();

View File

@ -14,7 +14,7 @@ class OcrObject : public QObject
public:
static OcrObject* getInstance();
void getTxtContent(QString &path, QString &textcontent);
void getTxtContent(const QString &path, QString &textcontent);
protected:
explicit OcrObject(QObject *parent = nullptr);

View File

@ -20,9 +20,10 @@
#include "pending-file-queue.h"
#include "file-utils.h"
#include <malloc.h>
#include "index-status-recorder.h"
using namespace UkuiSearch;
static PendingFileQueue *global_instance_pending_file_queue = nullptr;
PendingFileQueue::PendingFileQueue(QObject *parent) : QThread(parent), m_semaphore(INDEX_SEM, 0, QSystemSemaphore::AccessMode::Open)
PendingFileQueue::PendingFileQueue(QObject *parent) : QThread(parent)
{
this->start();
@ -67,8 +68,6 @@ PendingFileQueue::~PendingFileQueue()
m_minProcessTimer = nullptr;
}
global_instance_pending_file_queue = nullptr;
IndexGenerator::getInstance()->~IndexGenerator();
}
void PendingFileQueue::forceFinish()
@ -77,16 +76,13 @@ void PendingFileQueue::forceFinish()
this->quit();
this->wait();
processCache();
m_semaphore.release(1);
}
void PendingFileQueue::enqueue(const PendingFile &file)
{
// qDebug() << "enqueuq file: " << file.path();
m_mutex.lock();
m_enqueuetimes++;
if(m_cache.isEmpty()) {
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "0");
}
// Remove all indexs of files under a dir which is to about be deleted,but keep delete signals.
// Because our datebase need to delete those indexs one by one.
if(file.shouldRemoveIndex() && file.isDir()) {
@ -112,7 +108,6 @@ void PendingFileQueue::enqueue(const PendingFile &file)
if(!m_cacheTimer->isActive()) {
// qDebug()<<"m_cacheTimer-----start!!";
// m_cacheTimer->start();
Q_EMIT cacheTimerStart();
}
Q_EMIT minProcessTimerStart();
@ -130,41 +125,21 @@ void PendingFileQueue::enqueue(const PendingFile &file)
void PendingFileQueue::run()
{
//阻塞线程直到first-index进程结束
m_semaphore.acquire();
exec();
}
void PendingFileQueue::processCache()
{
qDebug()<< "Begin processCache!" ;
qDebug()<< "PendingFileQueue Begin processCache!" ;
QVector<PendingFile> pendingFiles;
m_mutex.lock();
qDebug() << "Events:" << m_enqueuetimes;
m_enqueuetimes = 0;
m_cache.swap(m_pendingFiles);
// m_pendingFiles = m_cache;
// m_cache.clear();
// m_cache.squeeze();
m_cache.swap(pendingFiles);
m_mutex.unlock();
qDebug() << "Current process-------------";
for(PendingFile i : m_pendingFiles) {
qDebug() << "|" << i.path();
qDebug() << "|" <<i.shouldRemoveIndex();
qDebug() << pendingFiles.size() << "Events need be processed";
if(!pendingFiles.isEmpty()) {
Q_EMIT filesUpdate(pendingFiles);
} else {
qDebug()<< "Empty, PendingFileQueue finish processCache!";
}
qDebug() << "Current process-------------";
if(m_pendingFiles.isEmpty()) {
qDebug()<< "Empty, finish processCache!";
return;
}
IndexGenerator::getInstance()->updateIndex(&m_pendingFiles);
m_mutex.lock();
if(m_cache.isEmpty()) {
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2");
}
m_mutex.unlock();
m_pendingFiles.clear();
m_pendingFiles.squeeze();
malloc_trim(0);
qDebug()<< "Finish processCache!";
return;
}

View File

@ -25,9 +25,7 @@
#include <QTimer>
#include <QThread>
#include <QMutex>
#include <QSystemSemaphore>
#include "pending-file.h"
#include "index-generator.h"
namespace UkuiSearch {
class PendingFileQueue : public QThread
@ -46,22 +44,21 @@ public:
protected:
void run() override;
Q_SIGNALS:
void cacheTimerStart();
void minProcessTimerStart();
void timerStop();
void filesUpdate(const QVector<PendingFile>&);
private:
void processCache();
explicit PendingFileQueue(QObject *parent = nullptr);
QVector<PendingFile> m_cache;
QVector<PendingFile> m_pendingFiles;
QMutex m_mutex;
QMutex m_timeoutMutex;
QSystemSemaphore m_semaphore;
QThread *m_timerThread = nullptr;
bool m_timeout = false;
int m_enqueuetimes = 0;
};

View File

@ -42,6 +42,11 @@ void PendingFile::setPath(const QString& path)
m_path = path;
}
void PendingFile::setIsDir(bool isDir)
{
m_isDir = isDir;
}
//bool PendingFile::isNewFile() const
//{
// return m_created;

View File

@ -33,12 +33,23 @@ public:
QString path() const;
void setPath(const QString& path);
QString suffix();
void setIsDir(){ m_isDir = true; }
void setIsDir(bool isDir);
void setModified() { m_modified = true; }
// void setCreated() { m_created = true; }
bool isModified() { return m_modified; }
void setCreated() { m_created = true; }
void setDeleted() { m_deleted = true; }
bool isCreated() const {return m_created;}
bool shouldRemoveIndex() const;
// bool shouldIndexContents() const;
bool isDir() const;
bool operator == (const PendingFile& rhs) const {
@ -54,7 +65,7 @@ public:
private:
QString m_path;
// bool m_created : 1;
bool m_created : 1;
bool m_deleted : 1;
bool m_modified : 1;
bool m_isDir : 1;

View File

@ -36,7 +36,25 @@
#include <QtConcurrent/QtConcurrent>
#include <QThread>
#include <QUrl>
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "search-plugin-iface.h"
#include "file-utils.h"
#include "global-settings.h"

View File

@ -36,14 +36,10 @@ UkuiSearchQDBus::~UkuiSearchQDBus() {
this->tmpSystemQDBusInterface = nullptr;
}
//一键三连
void UkuiSearchQDBus::setInotifyMaxUserWatches() {
// /proc/sys/fs/inotify/max_user_watches
// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep1");
// sysctl
this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2");
// /etc/sysctl.conf
// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3");
}
void UkuiSearchQDBus::addInotifyUserInstances(int addNum)

View File

@ -0,0 +1,178 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#include "writable-database.h"
#include <QDir>
#include <QDebug>
#include <memory>
#include "file-utils.h"
#include "index-status-recorder.h"
using namespace UkuiSearch;
static QMutex g_basicDatabaseMutex;
static QMutex g_contentDatabaseMutex;
#define DATABASE_TRY(code) try { \
code; \
} \
catch (const Xapian::Error &e)\
{\
qWarning() << "Transaction error," << QString::fromStdString(e.get_description()); \
errorRecord(); \
}
WritableDatabase::WritableDatabase(const DataBaseType &type)
: m_type(type)
{
switch (type) {
case DataBaseType::Basic:
m_path = INDEX_PATH;
m_mutex = &g_basicDatabaseMutex;
break;
case DataBaseType::Content:
m_path = CONTENT_INDEX_PATH;
m_mutex = &g_contentDatabaseMutex;
break;
default:
break;
}
m_mutex->lock();
}
WritableDatabase::~WritableDatabase()
{
if(m_xpDatabase) {
delete m_xpDatabase;
m_xpDatabase = nullptr;
}
m_mutex->unlock();
}
bool WritableDatabase::open()
{
if(m_xpDatabase) {
return true;
}
QDir database(m_path);
if(!database.exists()) {
qDebug() << "Create basic writable database" << m_path<< database.mkpath(m_path);
}
try {
m_xpDatabase = new Xapian::WritableDatabase(m_path.toStdString(), Xapian::DB_CREATE_OR_OPEN);
} catch(const Xapian::Error &e) {
qWarning() << "Open WritableDatabase fail!" << m_path << QString::fromStdString(e.get_description());
return false;
}
return true;
}
void WritableDatabase::rebuild()
{
if(m_xpDatabase) {
delete m_xpDatabase;
m_xpDatabase = nullptr;
}
QDir database(m_path);
if(database.exists()) {
qDebug() << "Dababase rebuild, remove" << m_path << database.removeRecursively();
} else {
qDebug() << "Dababase rebuild, create" << m_path << database.mkpath(m_path);
}
}
void WritableDatabase::beginTransation()
{
DATABASE_TRY(m_xpDatabase->begin_transaction();)
}
void WritableDatabase::endTransation()
{
DATABASE_TRY(m_xpDatabase->commit_transaction();)
}
void WritableDatabase::cancelTransation()
{
DATABASE_TRY(m_xpDatabase->cancel_transaction();)
}
void WritableDatabase::commit()
{
DATABASE_TRY(m_xpDatabase->commit();)
}
void WritableDatabase::addDocument(const Document &doc)
{
DATABASE_TRY(m_xpDatabase->replace_document(doc.getUniqueTerm(), doc.getXapianDocument());\
m_xpDatabase->set_metadata(doc.getUniqueTerm(), doc.indexTime());)
}
void WritableDatabase::removeDocument(const QString &path)
{
DATABASE_TRY(m_xpDatabase->delete_document(FileUtils::makeDocUterm(path));)
}
void WritableDatabase::removeDocument(const std::string uniqueTerm)
{
DATABASE_TRY(m_xpDatabase->delete_document(uniqueTerm);)
}
void WritableDatabase::setMetaData(const QString &key, const QString &value)
{
DATABASE_TRY(m_xpDatabase->set_metadata(key.toStdString(), value.toStdString());)
}
void WritableDatabase::setMetaData(const std::string &key, const std::string &value)
{
DATABASE_TRY(m_xpDatabase->set_metadata(key, value);)
}
const std::string WritableDatabase::getMetaData(const std::string &key)
{
std::string value;
DATABASE_TRY(value = m_xpDatabase->get_metadata(key);)
return value;
}
QMap<std::string, std::string> WritableDatabase::getIndexTimes()
{
QMap<std::string, std::string> indexTimes;
DATABASE_TRY(
for(Xapian::TermIterator iter = m_xpDatabase->metadata_keys_begin();
iter != m_xpDatabase->metadata_keys_end(); ++iter) {
indexTimes.insert(*iter, m_xpDatabase->get_metadata(*iter));
})
return indexTimes;
}
void WritableDatabase::errorRecord()
{
switch (m_type) {
case DataBaseType::Basic:
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, IndexStatusRecorder::State::Error);
break;
case DataBaseType::Content:
IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, IndexStatusRecorder::State::Error);
break;
default:
break;
}
}

View File

@ -0,0 +1,64 @@
/*
* Copyright (C) 2022, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: iaom <zhangpengfei@kylinos.cn>
*
*/
#ifndef DATABASE_H
#define DATABASE_H
#include <QMutex>
#include <xapian.h>
#include "document.h"
#include "common.h"
namespace UkuiSearch {
/**
* @brief The WritableDatabase class\
* Wrapper for Xapian::WritableDatabase
*/
class WritableDatabase
{
friend class Transaction;
public:
WritableDatabase(const DataBaseType &type);
~WritableDatabase();
bool open();
void rebuild();
void beginTransation();
void endTransation();
void cancelTransation();
void commit();
void addDocument(const Document& doc);
void removeDocument(const QString& path);
void removeDocument(const std::string uniqueTerm);
void setMetaData(const QString& key, const QString& value);
void setMetaData(const std::string& key, const std::string& value);
const std::string getMetaData(const std::string &key);
QMap<std::string, std::string> getIndexTimes();
private:
WritableDatabase(const WritableDatabase& rhs) = delete;
void errorRecord();
DataBaseType m_type;
QString m_path;
Xapian::WritableDatabase* m_xpDatabase = nullptr;
QMutex *m_mutex = nullptr;
};
}
#endif // DATABASE_H

View File

@ -27,8 +27,6 @@
#include "plugininterface/search-plugin-iface.h"
#include "plugininterface/data-queue.h"
#include "index/file-index-manager.h"
#include "index/first-index.h"
#include "index/ukui-search-qdbus.h"
#include "index/search-manager.h"

View File

@ -4745,12 +4745,13 @@ bool bCreateSmallBlockList(ULONG ulStartblock, const ULONG *aulBBD, size_t tBBDL
if(tSmallBlockListLen == 0) {
/* There is no small block list */
aulSmallBlockList = NULL;
return true;
}
/* Create the small block list */
tSize = tSmallBlockListLen * sizeof(ULONG);
xfree(aulSmallBlockList);
aulSmallBlockList = NULL;
aulSmallBlockList = (ULONG*)xmalloc(tSize);
for(iIndex = 0, ulTmp = ulStartblock;
iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
@ -4979,7 +4980,7 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS,
} else {
//need more format document
ptaucBytes = (UCHAR*)xfree((void*)ptaucBytes);
qWarning() << "Parser error:" << m_strFileName;
// qWarning() << "Parser error:" << m_strFileName;
// content.append(QString::fromStdString((char*)ptaucBytes).replace("\r",""));
// ptaucBytes = (UCHAR*)xfree((void*)ptaucBytes);
}
@ -5057,15 +5058,19 @@ int KBinaryParser:: readSSTRecord(readDataParam &rdParam, ppsInfoType PPS_info,
if(usOthTxtLen > 0) {
memset(chTemp, 0, MAX_BUFF_SIZE);
if(readData(rdParam, chTemp, ulOff, usOthTxtLen) != 0)
if(readData(rdParam, chTemp, ulOff, usOthTxtLen) != 0) {
chData = (UCHAR*)xfree((void*)chData);
return -1;
}
memcpy(chData + usIdf, chTemp, usOthTxtLen);
}
if(bTemp)
usPartLen --;
} else {
if(readData(rdParam, chData, ulOff + ulNextOff, ustotalLen) != 0)
if(readData(rdParam, chData, ulOff + ulNextOff, ustotalLen) != 0) {
chData = (UCHAR*)xfree((void*)chData);
break;
}
}
if(eRrd.bUni) {
@ -5204,8 +5209,11 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) {
iToGo -= 127;
}
if(!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen))
if(!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) {
xfree(aulBbdList);
xfree(aulBBD);
return -1;
}
aulBbdList = (ULONG*)xfree(aulBbdList);
/* Small Block Depot */
@ -5217,14 +5225,19 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) {
iIndex++, ulTmp = aulBBD[ulTmp]) {
if(ulTmp >= (ULONG)tBBDLen) {
qWarning("The Big Block Depot is damaged");
aulSBD = (ULONG*)xfree(aulSBD);
aulBBD = (ULONG*)xfree(aulBBD);
return -1;
}
aulSbdList[iIndex] = ulTmp;
}
if(!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen))
if(!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) {
aulSBD = (ULONG*)xfree(aulSBD);
aulBBD = (ULONG*)xfree(aulBBD);
return -1;
}
aulSbdList = (ULONG*)xfree(aulSbdList);
@ -5232,26 +5245,38 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) {
for(tRootListLen = 0, ulTmp = ulRootStartblock;
tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN;
tRootListLen++, ulTmp = aulBBD[ulTmp]) {
if(ulTmp >= (ULONG)tBBDLen)
if(ulTmp >= (ULONG)tBBDLen) {
aulSBD = (ULONG*)xfree(aulSBD);
aulBBD = (ULONG*)xfree(aulBBD);
return -1;
}
}
if(tRootListLen == 0)
if(tRootListLen == 0) {
aulSBD = (ULONG*)xfree(aulSBD);
aulBBD = (ULONG*)xfree(aulBBD);
return -1;
}
aulRootList = (ULONG*)xcalloc(tRootListLen, sizeof(ULONG));
for(iIndex = 0, ulTmp = ulRootStartblock;
iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
iIndex++, ulTmp = aulBBD[ulTmp]) {
if(ulTmp >= (ULONG)tBBDLen)
if(ulTmp >= (ULONG)tBBDLen) {
aulSBD = (ULONG*)xfree(aulSBD);
aulBBD = (ULONG*)xfree(aulBBD);
return -1;
}
aulRootList[iIndex] = ulTmp;
}
bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info);
aulRootList = (ULONG*)xfree(aulRootList);
if(!bSuccess)
if(!bSuccess) {
aulSBD = (ULONG*)xfree(aulSBD);
aulBBD = (ULONG*)xfree(aulBBD);
return -1;
}
rdPara readParam;
readParam.pFile = pFile;
@ -5262,8 +5287,11 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) {
readParam.ulStBlk = PPS_info.tWordDocument.ulSB;
UCHAR aucHeader[HEADER_SIZE];
/* Small block list */
if(!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen))
if(!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) {
aulSBD = (ULONG*)xfree(aulSBD);
readParam.ulBBd = (ULONG*)xfree(readParam.ulBBd);
return -1;
}
if(PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) {
readParam.ulBBd = aulSBD;
@ -5271,8 +5299,11 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) {
readParam.usBlkSize = SMALL_BLOCK_SIZE;
}
if(readData(readParam, aucHeader, 0, HEADER_SIZE) != 0)
if(readData(readParam, aucHeader, 0, HEADER_SIZE) != 0) {
aulSBD = (ULONG*)xfree(aulSBD);
aulBBD = (ULONG*)xfree(aulBBD);
return -1;
}
usIdent = usGetWord(0x00, aucHeader);
@ -5281,8 +5312,11 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) {
usIdent != 0xa5dc && /* Word 6 & 7 */
usIdent != 0xa5ec && /* Word 7 & 97 & 98 */
usIdent != 0xa697 && /* Word 7 for oriental languages */
usIdent != 0xa699) /* Word 7 for oriental languages */
usIdent != 0xa699) { /* Word 7 for oriental languages */
aulSBD = (ULONG*)xfree(aulSBD);
aulBBD = (ULONG*)xfree(aulBBD);
return -1;
}
/* Get the status flags from the header */
usDocStatus = usGetWord(0x0a, aucHeader);
@ -5299,8 +5333,11 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) {
readParam.ulStBlk = PPS_info.tWorkBook.ulSB;
UCHAR aucHeader[4];
ulong ulOff = 0;
if(readData(readParam, aucHeader, 0, 8) != 0)
if(readData(readParam, aucHeader, 0, 8) != 0) {
aulSBD = (ULONG*)xfree(aulSBD);
aulBBD = (ULONG*)xfree(aulBBD);
return -1;
}
ulOff += 4;
USHORT usType = usGetWord(0x00, aucHeader);
@ -5325,6 +5362,8 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) {
} else {
qWarning() << "Unsupport doc type:" << m_strFileName;
}
aulSBD = (ULONG*)xfree(aulSBD);
aulBBD = (ULONG*)xfree(aulBBD);
return 0;
}
@ -5347,7 +5386,10 @@ KBinaryParser::KBinaryParser(QObject *parent)
}
KBinaryParser::~KBinaryParser()
{}
{
xfree(aulSmallBlockList);
aulSmallBlockList = NULL;
}
bool KBinaryParser::RunParser(QString strFile, QString &content) {
FILE* pFile = fopen(strFile.toLocal8Bit().data(), "rb");

View File

@ -12,8 +12,6 @@ class ResultItem;
class SearchControllerPrivate;
/*
*ID
*
*使
*/
class SearchController
{

View File

@ -3,7 +3,7 @@
using namespace UkuiSearch;
FileSystemWatcherTest::FileSystemWatcherTest(QObject *parent) : QObject(parent)
{
m_watcher = new FileSystemWatcher(FileSystemWatcher::WatchEvents(FileSystemWatcher::EventMove | FileSystemWatcher::EventMoveSelf |
m_watcher = new FileSystemWatcher(true, FileSystemWatcher::WatchEvents(FileSystemWatcher::EventMove | FileSystemWatcher::EventMoveSelf |
FileSystemWatcher::EventCreate | FileSystemWatcher::EventDelete |
FileSystemWatcher::EventDeleteSelf | FileSystemWatcher::EventUnmount |
FileSystemWatcher::EventModify | FileSystemWatcher::EventAttributeChange));
@ -11,18 +11,23 @@ FileSystemWatcherTest::FileSystemWatcherTest(QObject *parent) : QObject(parent)
void FileSystemWatcherTest::beginSignalTest()
{
m_watcher->addWatch("/home/zpf/图片");
m_watcher->addWatch("/home/zpf/图片/新建文夹");
connect(m_watcher, &FileSystemWatcher::attributeChanged,
[](const QString& fileUrl) { qDebug() << "AttrbuteChanged:" << fileUrl; });
connect(m_watcher, &FileSystemWatcher::created,
[](const QString& fileUrl, bool isDir) { qDebug() << "Created:" << fileUrl << isDir; });
connect(m_watcher, &FileSystemWatcher::deleted,
[](const QString& fileUrl, bool isDir) { qDebug() << "Deleted:" << fileUrl << isDir; });
connect(m_watcher, &FileSystemWatcher::modified,
[](const QString& fileUrl) { qDebug() << "Modified:" << fileUrl; });
connect(m_watcher, &FileSystemWatcher::moved,
[](const QString& fileUrl, bool isDir) { qDebug() << "Modified:" << fileUrl << isDir; });
[](const QString& fileUrl, bool isDir) { qDebug() << "moved:" << fileUrl << isDir; });
connect(m_watcher, &FileSystemWatcher::closedWrite,
[](const QString& fileUrl) { qDebug() << "ClosedWrite:" << fileUrl; });
}

View File

@ -0,0 +1,6 @@
import QtQuick 2.0
import QtQuick.Window 2.12
Window {
}

View File

@ -0,0 +1,8 @@
import QtQuick 2.0
import QtQuick.Controls 2.5
Item {
ProgressBar {
}
}

View File

@ -0,0 +1,8 @@
# 编译qrc资源
qmlFile.files = $$files($$PWD/*.qml)
RESOURCES += qmlFile
DISTFILES += \
$$PWD/IndexMonitor.qml \
$$PWD/IndexProgressBar.qml

View File

@ -1,11 +1,14 @@
#include <QDebug>
#include <QDBusConnection>
#include <QQmlContext>
#include "ukui-search-service.h"
#include "dir-watcher.h"
#include "common.h"
#include <QDBusConnection>
#include "file-utils.h"
#include "file-indexer-config.h"
using namespace UkuiSearch;
UkuiSearchService::UkuiSearchService(int &argc, char *argv[], const QString &applicationName): QtSingleApplication (applicationName, argc, argv)
UkuiSearchService::UkuiSearchService(int &argc, char *argv[], const QString &applicationName)
: QtSingleApplication (applicationName, argc, argv)
{
qDebug()<<"ukui search service constructor start";
setApplicationVersion(QString("v%1").arg(VERSION));
@ -15,9 +18,9 @@ UkuiSearchService::UkuiSearchService(int &argc, char *argv[], const QString &app
connect(this, &QtSingleApplication::messageReceived, [=](QString msg) {
this->parseCmd(msg, true);
});
DirWatcher::getDirWatcher();
initGsettings();
FileIndexManager::getInstance()->initIndexPathSetFunction();
qRegisterMetaType<IndexType>("IndexType");
m_indexScheduler = new IndexScheduler(this);
DirWatcher::getDirWatcher()->initDbusService();
}
//parse cmd
@ -28,6 +31,14 @@ UkuiSearchService::UkuiSearchService(int &argc, char *argv[], const QString &app
qDebug()<<"ukui search service constructor end";
}
UkuiSearchService::~UkuiSearchService()
{
if(m_quickView) {
delete m_quickView;
m_quickView = nullptr;
}
}
void UkuiSearchService::parseCmd(QString msg, bool isPrimary)
{
QCommandLineParser parser;
@ -41,6 +52,9 @@ void UkuiSearchService::parseCmd(QString msg, bool isPrimary)
QCommandLineOption startOption(QStringList()<<"i"<<"index", tr("start or stop file index"), "option");
parser.addOption(startOption);
QCommandLineOption monitorWindow(QStringList()<<"m"<<"monitor", tr("Show index monitor window"));
parser.addOption(monitorWindow);
// QCommandLineOption statusOption(QStringList()<<"s"<<"status", tr("show status of file index service"));
// parser.addOption(statusOption);
@ -50,13 +64,17 @@ void UkuiSearchService::parseCmd(QString msg, bool isPrimary)
if(parser.isSet(startOption)) {
qDebug() << "options!!!!" << parser.value(startOption);
if(parser.value(startOption) == "start") {
indexServiceSwitch(true);
m_indexScheduler->scheduleIndexing();
} else if (parser.value(startOption) == "stop") {
indexServiceSwitch(false);
m_indexScheduler->stop();
}
}
// if(parser.isSet(statusOption)) {
// }
if (parser.isSet(monitorWindow)) {
loadMonitorWindow();
m_quickView->show();
return;
}
if (parser.isSet(quitOption)) {
qApp->quit();
return;
@ -71,54 +89,11 @@ void UkuiSearchService::parseCmd(QString msg, bool isPrimary)
}
}
void UkuiSearchService::initGsettings()
void UkuiSearchService::loadMonitorWindow()
{
const QByteArray id(UKUI_SEARCH_SCHEMAS);
if(QGSettings::isSchemaInstalled(id)) {
m_SearchGsettings = new QGSettings(id);
connect(m_SearchGsettings, &QGSettings::changed, this, [ = ](const QString &key) {
if(key == SEARCH_METHOD_KEY) {
setSearchMethodByGsettings();
}
});
if(m_SearchGsettings->keys().contains(SEARCH_METHOD_KEY)) {
setSearchMethodByGsettings();
}
} else {
qWarning() << UKUI_SEARCH_SCHEMAS << " is not found!";
}
}
void UkuiSearchService::setSearchMethodByGsettings()
{
bool isIndexSearch = m_SearchGsettings->get(SEARCH_METHOD_KEY).toBool();
if(isIndexSearch) {
FileUtils::searchMethod = FileUtils::SearchMethod::INDEXSEARCH;
} else {
FileUtils::searchMethod = FileUtils::SearchMethod::DIRECTSEARCH;
}
FileIndexManager::getInstance()->searchMethod(FileUtils::searchMethod);
}
void UkuiSearchService::indexServiceSwitch(bool startIndex)
{
if(startIndex) {
FileUtils::searchMethod = FileUtils::SearchMethod::INDEXSEARCH;
} else {
FileUtils::searchMethod = FileUtils::SearchMethod::DIRECTSEARCH;
}
FileIndexManager::getInstance()->searchMethod(FileUtils::searchMethod);
const QByteArray id(UKUI_SEARCH_SCHEMAS);
if(QGSettings::isSchemaInstalled(id)) {
m_SearchGsettings = new QGSettings(id);
if(m_SearchGsettings->keys().contains(SEARCH_METHOD_KEY)) {
m_SearchGsettings->set(SEARCH_METHOD_KEY, startIndex);
} else {
qWarning() << SEARCH_METHOD_KEY << " is not found!";
}
} else {
qWarning() << UKUI_SEARCH_SCHEMAS << " is not found!";
if(!m_quickView) {
m_quickView = new QQuickView();
m_quickView->rootContext()->setContextProperty("monitor", m_monitor);
m_quickView->setSource(m_qmlPath);
}
}

View File

@ -3,10 +3,13 @@
#include <QObject>
#include <QCommandLineParser>
#include <QGSettings/QGSettings>
#include <QGSettings>
#include <QQuickView>
#include "qtsingleapplication.h"
#include "file-index-manager.h"
#include "common.h"
#include "index-scheduler.h"
#include "monitor.h"
namespace UkuiSearch {
class UkuiSearchService : public QtSingleApplication
@ -14,14 +17,18 @@ class UkuiSearchService : public QtSingleApplication
Q_OBJECT
public:
UkuiSearchService(int &argc, char *argv[], const QString &applicationName = "ukui-search-service");
~UkuiSearchService();
protected Q_SLOTS:
void parseCmd(QString msg, bool isPrimary);
private:
void initGsettings();
void setSearchMethodByGsettings();
void indexServiceSwitch(bool startIndex = true);
QGSettings *m_SearchGsettings;
void loadMonitorWindow();
IndexScheduler *m_indexScheduler = nullptr;
Monitor *m_monitor = nullptr;
QQuickView *m_quickView = nullptr;
QString m_qmlPath = "qrc:/qml/IndexMonitor.qml";
};
}
#endif // UKUISEARCHSERVICE_H

View File

@ -1,4 +1,4 @@
QT += core gui dbus
QT += core gui dbus quick
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
@ -22,6 +22,7 @@ DEFINES += QT_DEPRECATED_WARNINGS
include(../libsearch/libukui-search-headers.pri)
include(../3rd-parties/qtsingleapplication/qtsingleapplication.pri)
include(./qml/qml.pri)
LIBS += -L$$OUT_PWD/../libchinese-segmentation -lchinese-segmentation \
-L$$OUT_PWD/../libsearch -lukui-search

View File

@ -8,7 +8,7 @@ SUBDIRS += $$PWD/libchinese-segmentation \
$$PWD/ukui-search-app-data-service \
$$PWD/ukui-search-service-dir-manager
#SUBDIRS += tests
SUBDIRS += tests
# The following define makes your compiler emit warnings if you use
# any Qt feature that has been marked deprecated (the exact warnings