From 25bae31cff7f5daee316c5f3d7a57e904ed5083d Mon Sep 17 00:00:00 2001 From: iaom Date: Wed, 27 Oct 2021 15:16:43 +0800 Subject: [PATCH] Add processing logic for encrypted files. --- libsearch/file-utils.cpp | 33 ++++++++++++++++++++++++ libsearch/file-utils.h | 1 + libsearch/index/file-reader.cpp | 39 +++++++++++------------------ libsearch/index/first-index.cpp | 4 ++- libsearch/index/index-generator.cpp | 36 ++++++++++++++++++++++++-- libsearch/index/index-generator.h | 1 + 6 files changed, 86 insertions(+), 28 deletions(-) diff --git a/libsearch/file-utils.cpp b/libsearch/file-utils.cpp index 2d4e6d3..3221ad4 100644 --- a/libsearch/file-utils.cpp +++ b/libsearch/file-utils.cpp @@ -910,6 +910,39 @@ bool FileUtils::isOpenXMLFileEncrypted(QString &path) return true; } } +//todo: only support docx, pptx, xlsx +bool FileUtils::isEncrypedOrUnreadable(QString path) +{ + QMimeType type = FileUtils::getMimetype(path); + QString name = type.name(); + QFileInfo file(path); + QString strsfx = file.suffix(); + if(name == "application/zip") { + if (strsfx == "docx" || strsfx == "pptx" || strsfx == "xlsx") { + + return FileUtils::isOpenXMLFileEncrypted(path); + } else { + return true; + } + } else if(name == "text/plain") { + if(strsfx.endsWith("txt")) + return false; + return true; + } else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") { + if(strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" || + strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") { + return false; + } + return true; + } else if(name == "application/pdf") { + if(strsfx == "pdf") + return false; + return true; + } else { + qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]"; + return true; + } +} QString FileUtils::getHtmlText(const QString &text, const QString &keyword) { diff --git a/libsearch/file-utils.h b/libsearch/file-utils.h index 671a426..b7825df 100644 --- a/libsearch/file-utils.h +++ b/libsearch/file-utils.h @@ -97,6 +97,7 @@ public: static QString chineseSubString(const std::string &myStr,int start,int length); static QIcon iconFromTheme(const QString& name, const QIcon &iconDefault); static bool isOpenXMLFileEncrypted(QString &path); + static bool isEncrypedOrUnreadable(QString path); static size_t _max_index_count; static size_t _current_index_count; //this one has been Abandoned,do not use it. static unsigned short _index_status; diff --git a/libsearch/index/file-reader.cpp b/libsearch/index/file-reader.cpp index e409374..4ac13f8 100644 --- a/libsearch/index/file-reader.cpp +++ b/libsearch/index/file-reader.cpp @@ -24,34 +24,23 @@ using namespace Zeeker; FileReader::FileReader(QObject *parent) : QObject(parent) { } - void FileReader::getTextContent(QString path, QString &textContent) { - QMimeType type = FileUtils::getMimetype(path); - QString name = type.name(); QFileInfo file(path); QString strsfx = file.suffix(); - if(name == "application/zip") { - if(strsfx.endsWith("docx")) - FileUtils::getDocxTextContent(path, textContent); - if(strsfx.endsWith("pptx")) - FileUtils::getPptxTextContent(path, textContent); - if(strsfx.endsWith("xlsx")) - FileUtils::getXlsxTextContent(path, textContent); - } else if(name == "text/plain") { - if(strsfx.endsWith("txt")) - FileUtils::getTxtContent(path, textContent); - } else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") { - if(strsfx.endsWith("doc") || strsfx.endsWith("dot") || strsfx.endsWith("wps") || strsfx.endsWith("ppt") || - strsfx.endsWith("pps") || strsfx.endsWith("dps") || strsfx.endsWith("et") || strsfx.endsWith("xls")) { - KBinaryParser searchdata; - searchdata.RunParser(path, textContent); - } - } else if(name == "application/pdf") { - if(strsfx.endsWith("pdf")) - FileUtils::getPdfTextContent(path, textContent); - } else { - qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]"; + if (strsfx == "docx") { + FileUtils::getDocxTextContent(path, textContent); + } else if (strsfx == "pptx") { + FileUtils::getPptxTextContent(path, textContent); + } else if (strsfx == "xlsx") { + FileUtils::getXlsxTextContent(path, textContent); + } else if (strsfx == "txt") { + FileUtils::getTxtContent(path, textContent); + } else if (strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" || + strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") { + KBinaryParser searchdata; + searchdata.RunParser(path, textContent); + } else if (strsfx == "pdf") { + FileUtils::getPdfTextContent(path, textContent); } - return; } diff --git a/libsearch/index/first-index.cpp b/libsearch/index/first-index.cpp index 59a44b1..52c3258 100644 --- a/libsearch/index/first-index.cpp +++ b/libsearch/index/first-index.cpp @@ -47,7 +47,9 @@ FirstIndex::~FirstIndex() { void FirstIndex::DoSomething(const QFileInfo& fileInfo) { // qDebug() << "there are some shit here"<q_index->enqueue(QVector() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0")); - if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) { + if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) + && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()]) + && (!FileUtils::isEncrypedOrUnreadable(fileInfo.absoluteFilePath()))) { //this->q_content_index->enqueue(fileInfo.absoluteFilePath()); if (fileInfo.fileName().split(".").last() == "docx") { QuaZip file(fileInfo.absoluteFilePath()); diff --git a/libsearch/index/index-generator.cpp b/libsearch/index/index-generator.cpp index c846873..b0805da 100644 --- a/libsearch/index/index-generator.cpp +++ b/libsearch/index/index-generator.cpp @@ -486,11 +486,34 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist) { return true; } +bool IndexGenerator::deleteContentIndex(QStringList *pathlist) +{ + if(pathlist->isEmpty()) + return true; + try { + qDebug() << "--delete start--"; + for(int i = 0; i < pathlist->size(); i++) { + QString doc = pathlist->at(i); + std::string uniqueterm = FileUtils::makeDocUterm(doc); + m_database_content->delete_document(uniqueterm); + qDebug() << "delete path" << doc; + } + m_database_content->commit(); + qDebug() << "--delete finish--"; + } catch(const Xapian::Error &e) { + qWarning() << QString::fromStdString(e.get_description()); + return false; + } + return true; +} + bool IndexGenerator::updateIndex(QVector *pendingFiles) { + QQueue> *fileIndexInfo = new QQueue>; QQueue *fileContentIndexInfo = new QQueue; QStringList *deleteList = new QStringList; + QStringList *contentDeleteList = new QStringList; for(PendingFile file : *pendingFiles) { if(file.shouldRemoveIndex()) { @@ -498,12 +521,21 @@ bool IndexGenerator::updateIndex(QVector *pendingFiles) continue; } fileIndexInfo->append(QVector() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0")); - if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()])) - fileContentIndexInfo->append(file.path()); + if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()])) { + if(!FileUtils::isEncrypedOrUnreadable(file.path())) { + fileContentIndexInfo->append(file.path()); + } else { + contentDeleteList->append(file.path()); + } + } + } if(!deleteList->isEmpty()) { deleteAllIndex(deleteList); } + if(!contentDeleteList->isEmpty()) { + deleteContentIndex(contentDeleteList); + } if(!fileIndexInfo->isEmpty()) { creatAllIndex(fileIndexInfo); } diff --git a/libsearch/index/index-generator.h b/libsearch/index/index-generator.h index 873a6cd..3d66b84 100644 --- a/libsearch/index/index-generator.h +++ b/libsearch/index/index-generator.h @@ -62,6 +62,7 @@ public Q_SLOTS: bool creatAllIndex(QQueue> *messageList); bool creatAllIndex(QQueue *messageList); bool deleteAllIndex(QStringList *pathlist); + bool deleteContentIndex(QStringList *pathlist); bool updateIndex(QVector *pendingFiles); private: