Add processing logic for encrypted files.

This commit is contained in:
iaom 2021-10-27 15:16:43 +08:00
parent b3c2ed7fec
commit 25bae31cff
6 changed files with 86 additions and 28 deletions

View File

@ -910,6 +910,39 @@ bool FileUtils::isOpenXMLFileEncrypted(QString &path)
return true;
}
}
//todo: only support docx, pptx, xlsx
bool FileUtils::isEncrypedOrUnreadable(QString path)
{
QMimeType type = FileUtils::getMimetype(path);
QString name = type.name();
QFileInfo file(path);
QString strsfx = file.suffix();
if(name == "application/zip") {
if (strsfx == "docx" || strsfx == "pptx" || strsfx == "xlsx") {
return FileUtils::isOpenXMLFileEncrypted(path);
} else {
return true;
}
} else if(name == "text/plain") {
if(strsfx.endsWith("txt"))
return false;
return true;
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
if(strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" ||
strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") {
return false;
}
return true;
} else if(name == "application/pdf") {
if(strsfx == "pdf")
return false;
return true;
} else {
qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]";
return true;
}
}
QString FileUtils::getHtmlText(const QString &text, const QString &keyword)
{

View File

@ -97,6 +97,7 @@ public:
static QString chineseSubString(const std::string &myStr,int start,int length);
static QIcon iconFromTheme(const QString& name, const QIcon &iconDefault);
static bool isOpenXMLFileEncrypted(QString &path);
static bool isEncrypedOrUnreadable(QString path);
static size_t _max_index_count;
static size_t _current_index_count; //this one has been Abandoned,do not use it.
static unsigned short _index_status;

View File

@ -24,34 +24,23 @@ using namespace Zeeker;
FileReader::FileReader(QObject *parent) : QObject(parent) {
}
void FileReader::getTextContent(QString path, QString &textContent) {
QMimeType type = FileUtils::getMimetype(path);
QString name = type.name();
QFileInfo file(path);
QString strsfx = file.suffix();
if(name == "application/zip") {
if(strsfx.endsWith("docx"))
if (strsfx == "docx") {
FileUtils::getDocxTextContent(path, textContent);
if(strsfx.endsWith("pptx"))
} else if (strsfx == "pptx") {
FileUtils::getPptxTextContent(path, textContent);
if(strsfx.endsWith("xlsx"))
} else if (strsfx == "xlsx") {
FileUtils::getXlsxTextContent(path, textContent);
} else if(name == "text/plain") {
if(strsfx.endsWith("txt"))
} else if (strsfx == "txt") {
FileUtils::getTxtContent(path, textContent);
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
if(strsfx.endsWith("doc") || strsfx.endsWith("dot") || strsfx.endsWith("wps") || strsfx.endsWith("ppt") ||
strsfx.endsWith("pps") || strsfx.endsWith("dps") || strsfx.endsWith("et") || strsfx.endsWith("xls")) {
} else if (strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" ||
strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") {
KBinaryParser searchdata;
searchdata.RunParser(path, textContent);
}
} else if(name == "application/pdf") {
if(strsfx.endsWith("pdf"))
} else if (strsfx == "pdf") {
FileUtils::getPdfTextContent(path, textContent);
} else {
qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]";
}
return;
}

View File

@ -47,7 +47,9 @@ FirstIndex::~FirstIndex() {
void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) {
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1)
&& (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])
&& (!FileUtils::isEncrypedOrUnreadable(fileInfo.absoluteFilePath()))) {
//this->q_content_index->enqueue(fileInfo.absoluteFilePath());
if (fileInfo.fileName().split(".").last() == "docx") {
QuaZip file(fileInfo.absoluteFilePath());

View File

@ -486,11 +486,34 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
return true;
}
bool IndexGenerator::deleteContentIndex(QStringList *pathlist)
{
if(pathlist->isEmpty())
return true;
try {
qDebug() << "--delete start--";
for(int i = 0; i < pathlist->size(); i++) {
QString doc = pathlist->at(i);
std::string uniqueterm = FileUtils::makeDocUterm(doc);
m_database_content->delete_document(uniqueterm);
qDebug() << "delete path" << doc;
}
m_database_content->commit();
qDebug() << "--delete finish--";
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return false;
}
return true;
}
bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
{
QQueue<QVector<QString>> *fileIndexInfo = new QQueue<QVector<QString>>;
QQueue<QString> *fileContentIndexInfo = new QQueue<QString>;
QStringList *deleteList = new QStringList;
QStringList *contentDeleteList = new QStringList;
for(PendingFile file : *pendingFiles) {
if(file.shouldRemoveIndex()) {
@ -498,12 +521,21 @@ bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
continue;
}
fileIndexInfo->append(QVector<QString>() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0"));
if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()]))
if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()])) {
if(!FileUtils::isEncrypedOrUnreadable(file.path())) {
fileContentIndexInfo->append(file.path());
} else {
contentDeleteList->append(file.path());
}
}
}
if(!deleteList->isEmpty()) {
deleteAllIndex(deleteList);
}
if(!contentDeleteList->isEmpty()) {
deleteContentIndex(contentDeleteList);
}
if(!fileIndexInfo->isEmpty()) {
creatAllIndex(fileIndexInfo);
}

View File

@ -62,6 +62,7 @@ public Q_SLOTS:
bool creatAllIndex(QQueue<QVector<QString>> *messageList);
bool creatAllIndex(QQueue<QString> *messageList);
bool deleteAllIndex(QStringList *pathlist);
bool deleteContentIndex(QStringList *pathlist);
bool updateIndex(QVector<PendingFile> *pendingFiles);
private: