/* * Copyright (C) 2020, KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * Authors: zhangzihao * Modified by: zhangpengfei * */ //#include #include "first-index.h" #include "dir-watcher.h" #include /*需要重构: *支持新建或重建指定目录索引 *支持判断所有数据库状态,根据状态判断是否单独重建某个数据库。 *支持自定义增加索引目录。 */ using namespace UkuiSearch; FirstIndex *FirstIndex::m_instance = nullptr; std::once_flag g_firstIndexInstanceFlag; FirstIndex::FirstIndex() : m_semaphore(INDEX_SEM, 1, QSystemSemaphore::AccessMode::Open) { m_pool.setMaxThreadCount(2); m_pool.setExpiryTimeout(100); connect(this, &FirstIndex::needRebuild, this, &FirstIndex::rebuildDatebase, Qt::QueuedConnection); } FirstIndex *FirstIndex::getInstance() { std::call_once(g_firstIndexInstanceFlag, [] () { m_instance = new FirstIndex; }); return m_instance; } FirstIndex::~FirstIndex() { qDebug() << "~FirstIndex"; if(this->m_indexData) delete this->m_indexData; this->m_indexData = nullptr; if(this->m_contentIndexData) delete this->m_contentIndexData; this->m_contentIndexData = nullptr; if(this->m_ocrIndexData) delete this->m_ocrIndexData; this->m_ocrIndexData = nullptr; qDebug() << "~FirstIndex end"; } void FirstIndex::work(const QFileInfo& fileInfo) { // qDebug() << "there are some shit here"<m_indexData->enqueue(QVector() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0") << fileInfo.lastModified().toString("yyyyMMddHHmmss")); if (fileInfo.fileName().split(".", QString::SkipEmptyParts).length() < 2) return; if (true == targetFileTypeMap[fileInfo.fileName().split(".").last()] and false == FileUtils::isEncrypedOrUnreadable(fileInfo.absoluteFilePath())) { if (fileInfo.fileName().split(".").last() == "docx") { QuaZip file(fileInfo.absoluteFilePath()); if(!file.open(QuaZip::mdUnzip)) return; if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) return; QuaZipFile fileR(&file); this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//docx解压缩后的xml文件为实际需要解析文件大小 file.close(); } else if (fileInfo.fileName().split(".").last() == "pptx") { QuaZip file(fileInfo.absoluteFilePath()); if(!file.open(QuaZip::mdUnzip)) return; QString prefix("ppt/slides/slide"); qint64 fileSize(0); qint64 fileIndex(0); for(QString i : file.getFileNameList()) { if(i.startsWith(prefix)){ QString name = prefix + QString::number(fileIndex + 1) + ".xml"; fileIndex++; if(!file.setCurrentFile(name)) { continue; } QuaZipFile fileR(&file); fileSize += fileR.usize(); } } file.close(); this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileSize));//pptx解压缩后的xml文件为实际需要解析文件大小 } else if (fileInfo.fileName().split(".").last() == "xlsx") { QuaZip file(fileInfo.absoluteFilePath()); if(!file.open(QuaZip::mdUnzip)) return; if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) return; QuaZipFile fileR(&file); this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//xlsx解压缩后的xml文件为实际解析文件大小 file.close(); } else { this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size())); } } else if (true == targetPhotographTypeMap[fileInfo.fileName().split(".").last()]) { if (FileUtils::isOcrSupportSize(fileInfo.absoluteFilePath())) { this->m_contentIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size())); //this->m_ocrIndexData->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size())); } } } void FirstIndex::rebuildDatebase() { m_semaphore.acquire(); m_isRebuildProcess = true; this->wait(); this->start(); } void FirstIndex::addIndexPath(const QString path, const QStringList blockList) { m_semaphore.acquire(); m_isRebuildProcess = false; setPath(QStringList() << path); setBlockPath(blockList); this->wait(); this->start(); } void FirstIndex::run() { QTime t1 = QTime::currentTime(); QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString(); QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString(); // QString ocrIndexDatabaseStatus = IndexStatusRecorder::getInstance()->getStatus(OCR_DATABASE_STATE).toString(); QString inotifyIndexStatus = IndexStatusRecorder::getInstance()->getStatus(INOTIFY_NORMAL_EXIT).toString(); qInfo() << "indexDataBaseStatus: " << indexDataBaseStatus; qInfo() << "contentIndexDataBaseStatus: " << contentIndexDataBaseStatus; // qInfo() << "ocrIndexDatabaseStatus: " << ocrIndexDatabaseStatus; qInfo() << "inotifyIndexStatus: " << inotifyIndexStatus; m_inotifyIndexStatus = inotifyIndexStatus == "2" ? true : false; m_indexDatabaseStatus = indexDataBaseStatus == "2" ? true : false; m_contentIndexDatabaseStatus = contentIndexDataBaseStatus == "2" ? true : false; // m_ocrIndexDatabaseStatus = ocrIndexDatabaseStatus == "2" ? true : false; if(m_inotifyIndexStatus && m_indexDatabaseStatus && m_contentIndexDatabaseStatus /*&& m_ocrIndexDatabaseStatus*/) { m_needRebuild = false; if(m_isRebuildProcess) { m_isRebuildProcess = false; m_semaphore.release(1); return; } } else { if(m_isRebuildProcess) { setPath(DirWatcher::getDirWatcher()->currentIndexableDir()); setBlockPath(DirWatcher::getDirWatcher()->currentBlackListOfIndex()); } else { if(m_inotifyIndexStatus && (!m_indexDatabaseStatus || !m_contentIndexDatabaseStatus)) { m_needRebuild = true; } if(!m_inotifyIndexStatus || (!m_indexDatabaseStatus && !m_contentIndexDatabaseStatus)) { m_needRebuild = false; qInfo() << "Entering rebuild procedure"; Q_EMIT needRebuild(); m_semaphore.release(1); return; } } } IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "0"); this->m_indexData = new QQueue>(); this->m_contentIndexData = new QQueue>(); // this->m_ocrIndexData = new QQueue>(); ++FileUtils::indexStatus; pid_t pid; pid = fork(); if(pid == 0) { prctl(PR_SET_PDEATHSIG, SIGTERM); prctl(PR_SET_NAME, "first-index"); QSemaphore sem(5); QMutex mutex1, mutex2, mutex3; mutex1.lock(); mutex2.lock(); // mutex3.lock(); //FIXME:在子进程里使用和父进程同样的dbus接口会出问题。 // qInfo() << "index dir" << DirWatcher::getDirWatcher()->currentIndexableDir(); // qInfo() << "index block dir" << DirWatcher::getDirWatcher()->currentBlackListOfIndex(); qInfo() << "index dir" << m_pathList; qInfo() << "index block dir" << m_blockList; this->Traverse(); FileUtils::maxIndexCount = this->m_indexData->length(); qDebug() << "max_index_count:" << FileUtils::maxIndexCount; QtConcurrent::run(&m_pool, [&]() { sem.acquire(2); mutex1.unlock(); if(m_isRebuildProcess && m_inotifyIndexStatus && m_indexDatabaseStatus) { //重建索引且无异常 sem.release(2); return; } else if(m_isRebuildProcess) { //重建索引且有异常 IndexGenerator::getInstance()->rebuildIndexDatabase(); } else if(!m_inotifyIndexStatus || !m_indexDatabaseStatus) { //添加目录且有异常 qWarning() << "Index database need rebuild!"; sem.release(2); return; } qDebug() << "index start;" << m_indexData->size(); QQueue>* tmp1 = new QQueue>(); bool sucess = true; while(!this->m_indexData->empty()) { for(size_t i = 0; (i < 8192) && (!this->m_indexData->empty()); ++i) { tmp1->enqueue(this->m_indexData->dequeue()); } if(!IndexGenerator::getInstance()->creatAllIndex(tmp1)) { sucess = false; break; } tmp1->clear(); } delete tmp1; qDebug() << "index end;"; if(sucess) { IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "2"); } sem.release(2); }); QtConcurrent::run(&m_pool,[&]() { sem.acquire(2); mutex2.unlock(); if(m_isRebuildProcess && m_inotifyIndexStatus && m_contentIndexDatabaseStatus) { sem.release(2); return; } else if(m_isRebuildProcess) { //重建索引且有异常 IndexGenerator::getInstance()->rebuildContentIndexDatabase(); } else if(!m_inotifyIndexStatus || !m_contentIndexDatabaseStatus) { //添加目录且有异常 qWarning() << "Content index database need rebuild!"; sem.release(2); return; } qDebug() << "content index start:" << m_contentIndexData->size(); QQueue* tmp2 = new QQueue(); bool sucess = true; while(!this->m_contentIndexData->empty()) { qint64 fileSize = 0; //修改一次处理的数据量,从30个文件改为文件总大小为50M以下,50M为暂定值--jxx20210519 for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->m_contentIndexData->empty()); ++i) { QPair tempPair = this->m_contentIndexData->dequeue(); fileSize += tempPair.second; if (fileSize > 52428800 ) { if (tmp2->size() == 0) { tmp2->enqueue(tempPair.first); break; } this->m_contentIndexData->enqueue(tempPair); break; } tmp2->enqueue(tempPair.first); } if(!IndexGenerator::getInstance()->creatAllIndex(tmp2)) { sucess = false; break; } tmp2->clear(); } delete tmp2; qDebug() << "content index end;"; if(sucess) { IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, "2"); } sem.release(2); }); // OCR功能目前合到内容搜索分类中 // QtConcurrent::run(&m_pool,[&]() { // sem.acquire(5); // mutex3.unlock(); // QQueue* tmpOcr = new QQueue(); // qDebug() << "m_ocr_index:" << m_ocr_index->size(); // if(m_isFirstIndex && m_allDatadaseStatus && m_contentIndexDatabaseStatus) { // sem.release(2); // return; // } // IndexGenerator::getInstance()->rebuildOcrIndexDatabase(); // bool sucess = true; // while(!this->m_ocr_index->empty()) { // qint64 fileSize = 0; // //一次处理的数据量文件总大小为50M以下,50M为暂定值 // for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->m_ocr_index->empty()); ++i) { // QPair tempPair = this->m_ocr_index->dequeue(); // fileSize += tempPair.second; // if (fileSize > 52428800) { // if (tmpOcr->size() == 0) { // tmpOcr->enqueue(tempPair.first); // break; // } // this->m_ocr_index->enqueue(tempPair); // break; // } // tmpOcr->enqueue(tempPair.first); // } // if(!IndexGenerator::getInstance()->creatAllIndex(tmpOcr)) { // sucess = false; // break; // } // tmpOcr->clear(); // } // delete tmpOcr; // qDebug() << "OCR index end;"; // if(sucess) { // IndexStatusRecorder::getInstance()->setStatus(OCR_DATABASE_STATE, "2"); // } // sem.release(5); // }); mutex1.lock(); mutex2.lock(); // mutex3.lock(); sem.acquire(5); mutex1.unlock(); mutex2.unlock(); // mutex3.unlock(); if(this->m_indexData) delete this->m_indexData; this->m_indexData = nullptr; if(this->m_contentIndexData) delete this->m_contentIndexData; this->m_contentIndexData = nullptr; if(this->m_ocrIndexData) delete this->m_ocrIndexData; this->m_ocrIndexData = nullptr; ::_exit(0); } else if(pid < 0) { qWarning() << "First Index fork error!!"; } else { waitpid(pid, NULL, 0); --FileUtils::indexStatus; } IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2"); if(m_needRebuild) { m_needRebuild = false; qInfo() << "Entering rebuild procedure"; Q_EMIT needRebuild(); } m_semaphore.release(1); // int retval1 = write(fifo_fd, buffer, strlen(buffer)); // if(retval1 == -1) { // qWarning("write error\n"); // } // qDebug("write data ok!\n"); QTime t2 = QTime::currentTime(); qWarning() << t1; qWarning() << t2; return; }