Add processing logic for encrypted files.

This commit is contained in:
iaom 2021-10-27 15:16:43 +08:00
parent b3c2ed7fec
commit 25bae31cff
6 changed files with 86 additions and 28 deletions

View File

@ -910,6 +910,39 @@ bool FileUtils::isOpenXMLFileEncrypted(QString &path)
return true; return true;
} }
} }
//todo: only support docx, pptx, xlsx
bool FileUtils::isEncrypedOrUnreadable(QString path)
{
QMimeType type = FileUtils::getMimetype(path);
QString name = type.name();
QFileInfo file(path);
QString strsfx = file.suffix();
if(name == "application/zip") {
if (strsfx == "docx" || strsfx == "pptx" || strsfx == "xlsx") {
return FileUtils::isOpenXMLFileEncrypted(path);
} else {
return true;
}
} else if(name == "text/plain") {
if(strsfx.endsWith("txt"))
return false;
return true;
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
if(strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" ||
strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") {
return false;
}
return true;
} else if(name == "application/pdf") {
if(strsfx == "pdf")
return false;
return true;
} else {
qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]";
return true;
}
}
QString FileUtils::getHtmlText(const QString &text, const QString &keyword) QString FileUtils::getHtmlText(const QString &text, const QString &keyword)
{ {

View File

@ -97,6 +97,7 @@ public:
static QString chineseSubString(const std::string &myStr,int start,int length); static QString chineseSubString(const std::string &myStr,int start,int length);
static QIcon iconFromTheme(const QString& name, const QIcon &iconDefault); static QIcon iconFromTheme(const QString& name, const QIcon &iconDefault);
static bool isOpenXMLFileEncrypted(QString &path); static bool isOpenXMLFileEncrypted(QString &path);
static bool isEncrypedOrUnreadable(QString path);
static size_t _max_index_count; static size_t _max_index_count;
static size_t _current_index_count; //this one has been Abandoned,do not use it. static size_t _current_index_count; //this one has been Abandoned,do not use it.
static unsigned short _index_status; static unsigned short _index_status;

View File

@ -24,34 +24,23 @@ using namespace Zeeker;
FileReader::FileReader(QObject *parent) : QObject(parent) { FileReader::FileReader(QObject *parent) : QObject(parent) {
} }
void FileReader::getTextContent(QString path, QString &textContent) { void FileReader::getTextContent(QString path, QString &textContent) {
QMimeType type = FileUtils::getMimetype(path);
QString name = type.name();
QFileInfo file(path); QFileInfo file(path);
QString strsfx = file.suffix(); QString strsfx = file.suffix();
if(name == "application/zip") { if (strsfx == "docx") {
if(strsfx.endsWith("docx"))
FileUtils::getDocxTextContent(path, textContent); FileUtils::getDocxTextContent(path, textContent);
if(strsfx.endsWith("pptx")) } else if (strsfx == "pptx") {
FileUtils::getPptxTextContent(path, textContent); FileUtils::getPptxTextContent(path, textContent);
if(strsfx.endsWith("xlsx")) } else if (strsfx == "xlsx") {
FileUtils::getXlsxTextContent(path, textContent); FileUtils::getXlsxTextContent(path, textContent);
} else if(name == "text/plain") { } else if (strsfx == "txt") {
if(strsfx.endsWith("txt"))
FileUtils::getTxtContent(path, textContent); FileUtils::getTxtContent(path, textContent);
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") { } else if (strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" ||
if(strsfx.endsWith("doc") || strsfx.endsWith("dot") || strsfx.endsWith("wps") || strsfx.endsWith("ppt") || strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") {
strsfx.endsWith("pps") || strsfx.endsWith("dps") || strsfx.endsWith("et") || strsfx.endsWith("xls")) {
KBinaryParser searchdata; KBinaryParser searchdata;
searchdata.RunParser(path, textContent); searchdata.RunParser(path, textContent);
} } else if (strsfx == "pdf") {
} else if(name == "application/pdf") {
if(strsfx.endsWith("pdf"))
FileUtils::getPdfTextContent(path, textContent); FileUtils::getPdfTextContent(path, textContent);
} else {
qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]";
} }
return; return;
} }

View File

@ -47,7 +47,9 @@ FirstIndex::~FirstIndex() {
void FirstIndex::DoSomething(const QFileInfo& fileInfo) { void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0"); // qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0")); this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) { if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1)
&& (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])
&& (!FileUtils::isEncrypedOrUnreadable(fileInfo.absoluteFilePath()))) {
//this->q_content_index->enqueue(fileInfo.absoluteFilePath()); //this->q_content_index->enqueue(fileInfo.absoluteFilePath());
if (fileInfo.fileName().split(".").last() == "docx") { if (fileInfo.fileName().split(".").last() == "docx") {
QuaZip file(fileInfo.absoluteFilePath()); QuaZip file(fileInfo.absoluteFilePath());

View File

@ -486,11 +486,34 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
return true; return true;
} }
bool IndexGenerator::deleteContentIndex(QStringList *pathlist)
{
if(pathlist->isEmpty())
return true;
try {
qDebug() << "--delete start--";
for(int i = 0; i < pathlist->size(); i++) {
QString doc = pathlist->at(i);
std::string uniqueterm = FileUtils::makeDocUterm(doc);
m_database_content->delete_document(uniqueterm);
qDebug() << "delete path" << doc;
}
m_database_content->commit();
qDebug() << "--delete finish--";
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return false;
}
return true;
}
bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles) bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
{ {
QQueue<QVector<QString>> *fileIndexInfo = new QQueue<QVector<QString>>; QQueue<QVector<QString>> *fileIndexInfo = new QQueue<QVector<QString>>;
QQueue<QString> *fileContentIndexInfo = new QQueue<QString>; QQueue<QString> *fileContentIndexInfo = new QQueue<QString>;
QStringList *deleteList = new QStringList; QStringList *deleteList = new QStringList;
QStringList *contentDeleteList = new QStringList;
for(PendingFile file : *pendingFiles) { for(PendingFile file : *pendingFiles) {
if(file.shouldRemoveIndex()) { if(file.shouldRemoveIndex()) {
@ -498,12 +521,21 @@ bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
continue; continue;
} }
fileIndexInfo->append(QVector<QString>() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0")); fileIndexInfo->append(QVector<QString>() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0"));
if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()])) if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()])) {
if(!FileUtils::isEncrypedOrUnreadable(file.path())) {
fileContentIndexInfo->append(file.path()); fileContentIndexInfo->append(file.path());
} else {
contentDeleteList->append(file.path());
}
}
} }
if(!deleteList->isEmpty()) { if(!deleteList->isEmpty()) {
deleteAllIndex(deleteList); deleteAllIndex(deleteList);
} }
if(!contentDeleteList->isEmpty()) {
deleteContentIndex(contentDeleteList);
}
if(!fileIndexInfo->isEmpty()) { if(!fileIndexInfo->isEmpty()) {
creatAllIndex(fileIndexInfo); creatAllIndex(fileIndexInfo);
} }

View File

@ -62,6 +62,7 @@ public Q_SLOTS:
bool creatAllIndex(QQueue<QVector<QString>> *messageList); bool creatAllIndex(QQueue<QVector<QString>> *messageList);
bool creatAllIndex(QQueue<QString> *messageList); bool creatAllIndex(QQueue<QString> *messageList);
bool deleteAllIndex(QStringList *pathlist); bool deleteAllIndex(QStringList *pathlist);
bool deleteContentIndex(QStringList *pathlist);
bool updateIndex(QVector<PendingFile> *pendingFiles); bool updateIndex(QVector<PendingFile> *pendingFiles);
private: private: