Add processing logic for encrypted files.
This commit is contained in:
parent
b3c2ed7fec
commit
25bae31cff
|
@ -910,6 +910,39 @@ bool FileUtils::isOpenXMLFileEncrypted(QString &path)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
//todo: only support docx, pptx, xlsx
|
||||||
|
bool FileUtils::isEncrypedOrUnreadable(QString path)
|
||||||
|
{
|
||||||
|
QMimeType type = FileUtils::getMimetype(path);
|
||||||
|
QString name = type.name();
|
||||||
|
QFileInfo file(path);
|
||||||
|
QString strsfx = file.suffix();
|
||||||
|
if(name == "application/zip") {
|
||||||
|
if (strsfx == "docx" || strsfx == "pptx" || strsfx == "xlsx") {
|
||||||
|
|
||||||
|
return FileUtils::isOpenXMLFileEncrypted(path);
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if(name == "text/plain") {
|
||||||
|
if(strsfx.endsWith("txt"))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
|
||||||
|
if(strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" ||
|
||||||
|
strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else if(name == "application/pdf") {
|
||||||
|
if(strsfx == "pdf")
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
QString FileUtils::getHtmlText(const QString &text, const QString &keyword)
|
QString FileUtils::getHtmlText(const QString &text, const QString &keyword)
|
||||||
{
|
{
|
||||||
|
|
|
@ -97,6 +97,7 @@ public:
|
||||||
static QString chineseSubString(const std::string &myStr,int start,int length);
|
static QString chineseSubString(const std::string &myStr,int start,int length);
|
||||||
static QIcon iconFromTheme(const QString& name, const QIcon &iconDefault);
|
static QIcon iconFromTheme(const QString& name, const QIcon &iconDefault);
|
||||||
static bool isOpenXMLFileEncrypted(QString &path);
|
static bool isOpenXMLFileEncrypted(QString &path);
|
||||||
|
static bool isEncrypedOrUnreadable(QString path);
|
||||||
static size_t _max_index_count;
|
static size_t _max_index_count;
|
||||||
static size_t _current_index_count; //this one has been Abandoned,do not use it.
|
static size_t _current_index_count; //this one has been Abandoned,do not use it.
|
||||||
static unsigned short _index_status;
|
static unsigned short _index_status;
|
||||||
|
|
|
@ -24,34 +24,23 @@ using namespace Zeeker;
|
||||||
FileReader::FileReader(QObject *parent) : QObject(parent) {
|
FileReader::FileReader(QObject *parent) : QObject(parent) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void FileReader::getTextContent(QString path, QString &textContent) {
|
void FileReader::getTextContent(QString path, QString &textContent) {
|
||||||
QMimeType type = FileUtils::getMimetype(path);
|
|
||||||
QString name = type.name();
|
|
||||||
QFileInfo file(path);
|
QFileInfo file(path);
|
||||||
QString strsfx = file.suffix();
|
QString strsfx = file.suffix();
|
||||||
if(name == "application/zip") {
|
if (strsfx == "docx") {
|
||||||
if(strsfx.endsWith("docx"))
|
|
||||||
FileUtils::getDocxTextContent(path, textContent);
|
FileUtils::getDocxTextContent(path, textContent);
|
||||||
if(strsfx.endsWith("pptx"))
|
} else if (strsfx == "pptx") {
|
||||||
FileUtils::getPptxTextContent(path, textContent);
|
FileUtils::getPptxTextContent(path, textContent);
|
||||||
if(strsfx.endsWith("xlsx"))
|
} else if (strsfx == "xlsx") {
|
||||||
FileUtils::getXlsxTextContent(path, textContent);
|
FileUtils::getXlsxTextContent(path, textContent);
|
||||||
} else if(name == "text/plain") {
|
} else if (strsfx == "txt") {
|
||||||
if(strsfx.endsWith("txt"))
|
|
||||||
FileUtils::getTxtContent(path, textContent);
|
FileUtils::getTxtContent(path, textContent);
|
||||||
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
|
} else if (strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" ||
|
||||||
if(strsfx.endsWith("doc") || strsfx.endsWith("dot") || strsfx.endsWith("wps") || strsfx.endsWith("ppt") ||
|
strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") {
|
||||||
strsfx.endsWith("pps") || strsfx.endsWith("dps") || strsfx.endsWith("et") || strsfx.endsWith("xls")) {
|
|
||||||
KBinaryParser searchdata;
|
KBinaryParser searchdata;
|
||||||
searchdata.RunParser(path, textContent);
|
searchdata.RunParser(path, textContent);
|
||||||
}
|
} else if (strsfx == "pdf") {
|
||||||
} else if(name == "application/pdf") {
|
|
||||||
if(strsfx.endsWith("pdf"))
|
|
||||||
FileUtils::getPdfTextContent(path, textContent);
|
FileUtils::getPdfTextContent(path, textContent);
|
||||||
} else {
|
|
||||||
qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,9 @@ FirstIndex::~FirstIndex() {
|
||||||
void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
|
void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
|
||||||
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
|
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
|
||||||
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
|
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
|
||||||
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) {
|
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1)
|
||||||
|
&& (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])
|
||||||
|
&& (!FileUtils::isEncrypedOrUnreadable(fileInfo.absoluteFilePath()))) {
|
||||||
//this->q_content_index->enqueue(fileInfo.absoluteFilePath());
|
//this->q_content_index->enqueue(fileInfo.absoluteFilePath());
|
||||||
if (fileInfo.fileName().split(".").last() == "docx") {
|
if (fileInfo.fileName().split(".").last() == "docx") {
|
||||||
QuaZip file(fileInfo.absoluteFilePath());
|
QuaZip file(fileInfo.absoluteFilePath());
|
||||||
|
|
|
@ -486,11 +486,34 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IndexGenerator::deleteContentIndex(QStringList *pathlist)
|
||||||
|
{
|
||||||
|
if(pathlist->isEmpty())
|
||||||
|
return true;
|
||||||
|
try {
|
||||||
|
qDebug() << "--delete start--";
|
||||||
|
for(int i = 0; i < pathlist->size(); i++) {
|
||||||
|
QString doc = pathlist->at(i);
|
||||||
|
std::string uniqueterm = FileUtils::makeDocUterm(doc);
|
||||||
|
m_database_content->delete_document(uniqueterm);
|
||||||
|
qDebug() << "delete path" << doc;
|
||||||
|
}
|
||||||
|
m_database_content->commit();
|
||||||
|
qDebug() << "--delete finish--";
|
||||||
|
} catch(const Xapian::Error &e) {
|
||||||
|
qWarning() << QString::fromStdString(e.get_description());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
|
bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
|
||||||
{
|
{
|
||||||
|
|
||||||
QQueue<QVector<QString>> *fileIndexInfo = new QQueue<QVector<QString>>;
|
QQueue<QVector<QString>> *fileIndexInfo = new QQueue<QVector<QString>>;
|
||||||
QQueue<QString> *fileContentIndexInfo = new QQueue<QString>;
|
QQueue<QString> *fileContentIndexInfo = new QQueue<QString>;
|
||||||
QStringList *deleteList = new QStringList;
|
QStringList *deleteList = new QStringList;
|
||||||
|
QStringList *contentDeleteList = new QStringList;
|
||||||
for(PendingFile file : *pendingFiles) {
|
for(PendingFile file : *pendingFiles) {
|
||||||
if(file.shouldRemoveIndex()) {
|
if(file.shouldRemoveIndex()) {
|
||||||
|
|
||||||
|
@ -498,12 +521,21 @@ bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
fileIndexInfo->append(QVector<QString>() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0"));
|
fileIndexInfo->append(QVector<QString>() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0"));
|
||||||
if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()]))
|
if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()])) {
|
||||||
|
if(!FileUtils::isEncrypedOrUnreadable(file.path())) {
|
||||||
fileContentIndexInfo->append(file.path());
|
fileContentIndexInfo->append(file.path());
|
||||||
|
} else {
|
||||||
|
contentDeleteList->append(file.path());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if(!deleteList->isEmpty()) {
|
if(!deleteList->isEmpty()) {
|
||||||
deleteAllIndex(deleteList);
|
deleteAllIndex(deleteList);
|
||||||
}
|
}
|
||||||
|
if(!contentDeleteList->isEmpty()) {
|
||||||
|
deleteContentIndex(contentDeleteList);
|
||||||
|
}
|
||||||
if(!fileIndexInfo->isEmpty()) {
|
if(!fileIndexInfo->isEmpty()) {
|
||||||
creatAllIndex(fileIndexInfo);
|
creatAllIndex(fileIndexInfo);
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,6 +62,7 @@ public Q_SLOTS:
|
||||||
bool creatAllIndex(QQueue<QVector<QString>> *messageList);
|
bool creatAllIndex(QQueue<QVector<QString>> *messageList);
|
||||||
bool creatAllIndex(QQueue<QString> *messageList);
|
bool creatAllIndex(QQueue<QString> *messageList);
|
||||||
bool deleteAllIndex(QStringList *pathlist);
|
bool deleteAllIndex(QStringList *pathlist);
|
||||||
|
bool deleteContentIndex(QStringList *pathlist);
|
||||||
bool updateIndex(QVector<PendingFile> *pendingFiles);
|
bool updateIndex(QVector<PendingFile> *pendingFiles);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
Loading…
Reference in New Issue