ukui-search/libsearch/index/first-index.cpp

332 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangzihao <zhangzihao@kylinos.cn>
* Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
//#include <QtConcurrent>
#include "first-index.h"
#include "dir-watcher.h"
#include <QDebug>
using namespace UkuiSearch;
FirstIndex *FirstIndex::m_instance = nullptr;
std::once_flag g_firstIndexInstanceFlag;
FirstIndex::FirstIndex() : m_semaphore(INDEX_SEM, 1, QSystemSemaphore::AccessMode::Open)
{
m_pool.setMaxThreadCount(2);
m_pool.setExpiryTimeout(100);
}
FirstIndex *FirstIndex::getInstance()
{
std::call_once(g_firstIndexInstanceFlag, [] () {
m_instance = new FirstIndex;
});
return m_instance;
}
FirstIndex::~FirstIndex() {
qDebug() << "~FirstIndex";
if(this->m_indexData)
delete this->m_indexData;
this->m_indexData = nullptr;
if(this->m_contentIndexData)
delete this->m_contentIndexData;
this->m_contentIndexData = nullptr;
if(this->m_ocrIndexData)
delete this->m_ocrIndexData;
this->m_ocrIndexData = nullptr;
qDebug() << "~FirstIndex end";
}
void FirstIndex::work(const QFileInfo& fileInfo) {
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
this->m_indexData->enqueue(QVector<QString>() << fileInfo.fileName()
<< fileInfo.absoluteFilePath()
<< QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0")
<< fileInfo.lastModified().toString("yyyyMMddHHmmss"));
if (fileInfo.fileName().split(".", QString::SkipEmptyParts).length() < 2)
return;
if (true == targetFileTypeMap[fileInfo.fileName().split(".").last()]
and false == FileUtils::isEncrypedOrUnreadable(fileInfo.absoluteFilePath())) {
if (fileInfo.fileName().split(".").last() == "docx") {
QuaZip file(fileInfo.absoluteFilePath());
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive))
return;
QuaZipFile fileR(&file);
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//docx解压缩后的xml文件为实际需要解析文件大小
file.close();
} else if (fileInfo.fileName().split(".").last() == "pptx") {
QuaZip file(fileInfo.absoluteFilePath());
if(!file.open(QuaZip::mdUnzip))
return;
QString prefix("ppt/slides/slide");
qint64 fileSize(0);
qint64 fileIndex(0);
for(QString i : file.getFileNameList()) {
if(i.startsWith(prefix)){
QString name = prefix + QString::number(fileIndex + 1) + ".xml";
fileIndex++;
if(!file.setCurrentFile(name)) {
continue;
}
QuaZipFile fileR(&file);
fileSize += fileR.usize();
}
}
file.close();
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileSize));//pptx解压缩后的xml文件为实际需要解析文件大小
} else if (fileInfo.fileName().split(".").last() == "xlsx") {
QuaZip file(fileInfo.absoluteFilePath());
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive))
return;
QuaZipFile fileR(&file);
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//xlsx解压缩后的xml文件为实际解析文件大小
file.close();
} else {
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
}
} else if (true == targetPhotographTypeMap[fileInfo.fileName().split(".").last()]) {
if (FileUtils::isOcrSupportSize(fileInfo.absoluteFilePath())) {
this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
//this->m_ocrIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
}
}
}
void FirstIndex::addIndexPath(const QString path, const QStringList blockList)
{
m_semaphore.acquire();
setPath(QStringList() << path);
setBlockPath(blockList);
this->wait();
this->start();
}
void FirstIndex::run() {
QTime t1 = QTime::currentTime();
// Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running.
QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString();
QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString();
// QString ocrIndexDatabaseStatus = IndexStatusRecorder::getInstance()->getStatus(OCR_DATABASE_STATE).toString();
QString inotifyIndexStatus = IndexStatusRecorder::getInstance()->getStatus(INOTIFY_NORMAL_EXIT).toString();
qInfo() << "indexDataBaseStatus: " << indexDataBaseStatus;
qInfo() << "contentIndexDataBaseStatus: " << contentIndexDataBaseStatus;
// qInfo() << "ocrIndexDatabaseStatus: " << ocrIndexDatabaseStatus;
qInfo() << "inotifyIndexStatus: " << inotifyIndexStatus;
m_allDatadaseStatus = inotifyIndexStatus == "2" ? true : false;
m_indexDatabaseStatus = indexDataBaseStatus == "2" ? true : false;
m_contentIndexDatabaseStatus = contentIndexDataBaseStatus == "2" ? true : false;
// m_ocrIndexDatabaseStatus = ocrIndexDatabaseStatus == "2" ? true : false;
if(m_allDatadaseStatus && m_indexDatabaseStatus && m_contentIndexDatabaseStatus /*&& m_ocrIndexDatabaseStatus*/) {
if(m_isFirstIndex) {
m_isFirstIndex = false;
m_semaphore.release(1);
return;
}
} else {
setPath(DirWatcher::getDirWatcher()->currentIndexableDir());
setBlockPath(DirWatcher::getDirWatcher()->currentBlackListOfIndex());
}
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "0");
this->m_indexData = new QQueue<QVector<QString>>();
this->m_contentIndexData = new QQueue<QPair<QString,qint64>>();
// this->m_ocrIndexData = new QQueue<QPair<QString,qint64>>();
++FileUtils::indexStatus;
pid_t pid;
pid = fork();
if(pid == 0) {
prctl(PR_SET_PDEATHSIG, SIGTERM);
prctl(PR_SET_NAME, "first-index");
QSemaphore sem(5);
QMutex mutex1, mutex2, mutex3;
mutex1.lock();
mutex2.lock();
// mutex3.lock();
//FIXME:在子进程里使用和父进程同样的dbus接口会出问题。
// qInfo() << "index dir" << DirWatcher::getDirWatcher()->currentIndexableDir();
// qInfo() << "index block dir" << DirWatcher::getDirWatcher()->currentBlackListOfIndex();
qInfo() << "index dir" << m_pathList;
qInfo() << "index block dir" << m_blockList;
this->Traverse();
FileUtils::maxIndexCount = this->m_indexData->length();
qDebug() << "max_index_count:" << FileUtils::maxIndexCount;
QtConcurrent::run(&m_pool, [&]() {
sem.acquire(2);
mutex1.unlock();
if(m_isFirstIndex && m_allDatadaseStatus && m_indexDatabaseStatus) {
sem.release(2);
return;
}
qDebug() << "index start;" << m_indexData->size();
IndexGenerator::getInstance()->rebuildIndexDatabase();
QQueue<QVector<QString>>* tmp1 = new QQueue<QVector<QString>>();
bool sucess = true;
while(!this->m_indexData->empty()) {
for(size_t i = 0; (i < 8192) && (!this->m_indexData->empty()); ++i) {
tmp1->enqueue(this->m_indexData->dequeue());
}
if(!IndexGenerator::getInstance()->creatAllIndex(tmp1)) {
sucess = false;
break;
}
tmp1->clear();
}
delete tmp1;
qDebug() << "index end;";
if(sucess) {
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "2");
}
sem.release(2);
});
QtConcurrent::run(&m_pool,[&]() {
sem.acquire(2);
mutex2.unlock();
if(m_isFirstIndex && m_allDatadaseStatus && m_contentIndexDatabaseStatus) {
sem.release(2);
return;
}
qDebug() << "content index start:" << m_contentIndexData->size();
IndexGenerator::getInstance()->rebuildContentIndexDatabase();
QQueue<QString>* tmp2 = new QQueue<QString>();
bool sucess = true;
while(!this->m_contentIndexData->empty()) {
qint64 fileSize = 0;
//修改一次处理的数据量从30个文件改为文件总大小为50M以下50M为暂定值--jxx20210519
for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->m_contentIndexData->empty()); ++i) {
QPair<QString,qint64> tempPair = this->m_contentIndexData->dequeue();
fileSize += tempPair.second;
if (fileSize > 52428800 ) {
if (tmp2->size() == 0) {
tmp2->enqueue(tempPair.first);
break;
}
this->m_contentIndexData->enqueue(tempPair);
break;
}
tmp2->enqueue(tempPair.first);
}
if(!IndexGenerator::getInstance()->creatAllIndex(tmp2)) {
sucess = false;
break;
}
tmp2->clear();
}
delete tmp2;
qDebug() << "content index end;";
if(sucess) {
IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, "2");
}
sem.release(2);
});
// OCR功能目前合到内容搜索分类中
// QtConcurrent::run(&m_pool,[&]() {
// sem.acquire(5);
// mutex3.unlock();
// QQueue<QString>* tmpOcr = new QQueue<QString>();
// qDebug() << "m_ocr_index:" << m_ocr_index->size();
// if(m_isFirstIndex && m_allDatadaseStatus && m_contentIndexDatabaseStatus) {
// sem.release(2);
// return;
// }
// IndexGenerator::getInstance()->rebuildOcrIndexDatabase();
// bool sucess = true;
// while(!this->m_ocr_index->empty()) {
// qint64 fileSize = 0;
// //一次处理的数据量文件总大小为50M以下50M为暂定值
// for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->m_ocr_index->empty()); ++i) {
// QPair<QString,qint64> tempPair = this->m_ocr_index->dequeue();
// fileSize += tempPair.second;
// if (fileSize > 52428800) {
// if (tmpOcr->size() == 0) {
// tmpOcr->enqueue(tempPair.first);
// break;
// }
// this->m_ocr_index->enqueue(tempPair);
// break;
// }
// tmpOcr->enqueue(tempPair.first);
// }
// if(!IndexGenerator::getInstance()->creatAllIndex(tmpOcr)) {
// sucess = false;
// break;
// }
// tmpOcr->clear();
// }
// delete tmpOcr;
// qDebug() << "OCR index end;";
// if(sucess) {
// IndexStatusRecorder::getInstance()->setStatus(OCR_DATABASE_STATE, "2");
// }
// sem.release(5);
// });
mutex1.lock();
mutex2.lock();
// mutex3.lock();
sem.acquire(5);
mutex1.unlock();
mutex2.unlock();
// mutex3.unlock();
if(this->m_indexData)
delete this->m_indexData;
this->m_indexData = nullptr;
if(this->m_contentIndexData)
delete this->m_contentIndexData;
this->m_contentIndexData = nullptr;
if(this->m_ocrIndexData)
delete this->m_ocrIndexData;
this->m_ocrIndexData = nullptr;
::_exit(0);
} else if(pid < 0) {
qWarning() << "First Index fork error!!";
} else {
waitpid(pid, NULL, 0);
--FileUtils::indexStatus;
}
m_isFirstIndex = false; //首次索引后置为false,后续start为添加索引目录时新建索引。
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2");
m_semaphore.release(1);
// int retval1 = write(fifo_fd, buffer, strlen(buffer));
// if(retval1 == -1) {
// qWarning("write error\n");
// }
// qDebug("write data ok!\n");
QTime t2 = QTime::currentTime();
qWarning() << t1;
qWarning() << t2;
return;
}