forked from openkylin/ukui-search
Add processing logic for encrypted files.
This commit is contained in:
parent
b3c2ed7fec
commit
25bae31cff
|
@ -910,6 +910,39 @@ bool FileUtils::isOpenXMLFileEncrypted(QString &path)
|
|||
return true;
|
||||
}
|
||||
}
|
||||
//todo: only support docx, pptx, xlsx
|
||||
bool FileUtils::isEncrypedOrUnreadable(QString path)
|
||||
{
|
||||
QMimeType type = FileUtils::getMimetype(path);
|
||||
QString name = type.name();
|
||||
QFileInfo file(path);
|
||||
QString strsfx = file.suffix();
|
||||
if(name == "application/zip") {
|
||||
if (strsfx == "docx" || strsfx == "pptx" || strsfx == "xlsx") {
|
||||
|
||||
return FileUtils::isOpenXMLFileEncrypted(path);
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
} else if(name == "text/plain") {
|
||||
if(strsfx.endsWith("txt"))
|
||||
return false;
|
||||
return true;
|
||||
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
|
||||
if(strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" ||
|
||||
strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
} else if(name == "application/pdf") {
|
||||
if(strsfx == "pdf")
|
||||
return false;
|
||||
return true;
|
||||
} else {
|
||||
qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
QString FileUtils::getHtmlText(const QString &text, const QString &keyword)
|
||||
{
|
||||
|
|
|
@ -97,6 +97,7 @@ public:
|
|||
static QString chineseSubString(const std::string &myStr,int start,int length);
|
||||
static QIcon iconFromTheme(const QString& name, const QIcon &iconDefault);
|
||||
static bool isOpenXMLFileEncrypted(QString &path);
|
||||
static bool isEncrypedOrUnreadable(QString path);
|
||||
static size_t _max_index_count;
|
||||
static size_t _current_index_count; //this one has been Abandoned,do not use it.
|
||||
static unsigned short _index_status;
|
||||
|
|
|
@ -24,34 +24,23 @@ using namespace Zeeker;
|
|||
FileReader::FileReader(QObject *parent) : QObject(parent) {
|
||||
|
||||
}
|
||||
|
||||
void FileReader::getTextContent(QString path, QString &textContent) {
|
||||
QMimeType type = FileUtils::getMimetype(path);
|
||||
QString name = type.name();
|
||||
QFileInfo file(path);
|
||||
QString strsfx = file.suffix();
|
||||
if(name == "application/zip") {
|
||||
if(strsfx.endsWith("docx"))
|
||||
FileUtils::getDocxTextContent(path, textContent);
|
||||
if(strsfx.endsWith("pptx"))
|
||||
FileUtils::getPptxTextContent(path, textContent);
|
||||
if(strsfx.endsWith("xlsx"))
|
||||
FileUtils::getXlsxTextContent(path, textContent);
|
||||
} else if(name == "text/plain") {
|
||||
if(strsfx.endsWith("txt"))
|
||||
FileUtils::getTxtContent(path, textContent);
|
||||
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
|
||||
if(strsfx.endsWith("doc") || strsfx.endsWith("dot") || strsfx.endsWith("wps") || strsfx.endsWith("ppt") ||
|
||||
strsfx.endsWith("pps") || strsfx.endsWith("dps") || strsfx.endsWith("et") || strsfx.endsWith("xls")) {
|
||||
KBinaryParser searchdata;
|
||||
searchdata.RunParser(path, textContent);
|
||||
}
|
||||
} else if(name == "application/pdf") {
|
||||
if(strsfx.endsWith("pdf"))
|
||||
FileUtils::getPdfTextContent(path, textContent);
|
||||
} else {
|
||||
qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]";
|
||||
if (strsfx == "docx") {
|
||||
FileUtils::getDocxTextContent(path, textContent);
|
||||
} else if (strsfx == "pptx") {
|
||||
FileUtils::getPptxTextContent(path, textContent);
|
||||
} else if (strsfx == "xlsx") {
|
||||
FileUtils::getXlsxTextContent(path, textContent);
|
||||
} else if (strsfx == "txt") {
|
||||
FileUtils::getTxtContent(path, textContent);
|
||||
} else if (strsfx == "doc" || strsfx == "dot" || strsfx == "wps" || strsfx == "ppt" ||
|
||||
strsfx == "pps" || strsfx == "dps" || strsfx == "et" || strsfx == "xls") {
|
||||
KBinaryParser searchdata;
|
||||
searchdata.RunParser(path, textContent);
|
||||
} else if (strsfx == "pdf") {
|
||||
FileUtils::getPdfTextContent(path, textContent);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -47,7 +47,9 @@ FirstIndex::~FirstIndex() {
|
|||
void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
|
||||
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
|
||||
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
|
||||
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) {
|
||||
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1)
|
||||
&& (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])
|
||||
&& (!FileUtils::isEncrypedOrUnreadable(fileInfo.absoluteFilePath()))) {
|
||||
//this->q_content_index->enqueue(fileInfo.absoluteFilePath());
|
||||
if (fileInfo.fileName().split(".").last() == "docx") {
|
||||
QuaZip file(fileInfo.absoluteFilePath());
|
||||
|
|
|
@ -486,11 +486,34 @@ bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool IndexGenerator::deleteContentIndex(QStringList *pathlist)
|
||||
{
|
||||
if(pathlist->isEmpty())
|
||||
return true;
|
||||
try {
|
||||
qDebug() << "--delete start--";
|
||||
for(int i = 0; i < pathlist->size(); i++) {
|
||||
QString doc = pathlist->at(i);
|
||||
std::string uniqueterm = FileUtils::makeDocUterm(doc);
|
||||
m_database_content->delete_document(uniqueterm);
|
||||
qDebug() << "delete path" << doc;
|
||||
}
|
||||
m_database_content->commit();
|
||||
qDebug() << "--delete finish--";
|
||||
} catch(const Xapian::Error &e) {
|
||||
qWarning() << QString::fromStdString(e.get_description());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
|
||||
{
|
||||
|
||||
QQueue<QVector<QString>> *fileIndexInfo = new QQueue<QVector<QString>>;
|
||||
QQueue<QString> *fileContentIndexInfo = new QQueue<QString>;
|
||||
QStringList *deleteList = new QStringList;
|
||||
QStringList *contentDeleteList = new QStringList;
|
||||
for(PendingFile file : *pendingFiles) {
|
||||
if(file.shouldRemoveIndex()) {
|
||||
|
||||
|
@ -498,12 +521,21 @@ bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
|
|||
continue;
|
||||
}
|
||||
fileIndexInfo->append(QVector<QString>() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0"));
|
||||
if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()]))
|
||||
fileContentIndexInfo->append(file.path());
|
||||
if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()])) {
|
||||
if(!FileUtils::isEncrypedOrUnreadable(file.path())) {
|
||||
fileContentIndexInfo->append(file.path());
|
||||
} else {
|
||||
contentDeleteList->append(file.path());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if(!deleteList->isEmpty()) {
|
||||
deleteAllIndex(deleteList);
|
||||
}
|
||||
if(!contentDeleteList->isEmpty()) {
|
||||
deleteContentIndex(contentDeleteList);
|
||||
}
|
||||
if(!fileIndexInfo->isEmpty()) {
|
||||
creatAllIndex(fileIndexInfo);
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@ public Q_SLOTS:
|
|||
bool creatAllIndex(QQueue<QVector<QString>> *messageList);
|
||||
bool creatAllIndex(QQueue<QString> *messageList);
|
||||
bool deleteAllIndex(QStringList *pathlist);
|
||||
bool deleteContentIndex(QStringList *pathlist);
|
||||
bool updateIndex(QVector<PendingFile> *pendingFiles);
|
||||
|
||||
private:
|
||||
|
|
Loading…
Reference in New Issue