Add support for 'doc' file in file content search.

This commit is contained in:
iaom 2021-03-02 15:19:10 +08:00
parent 87d70b4ae0
commit 4a8076122e
10 changed files with 5651 additions and 6 deletions

View File

@ -122,6 +122,8 @@ void ConstructDocumentForContent::run()
_doc_list_content = new QList<Document>; _doc_list_content = new QList<Document>;
QString content; QString content;
FileReader::getTextContent(m_path,content); FileReader::getTextContent(m_path,content);
if(content.isEmpty())
return;
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path)); QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep))); QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));

View File

@ -19,6 +19,7 @@
*/ */
#include "file-reader.h" #include "file-reader.h"
#include "file-utils.h" #include "file-utils.h"
#include "binary-parser.h"
FileReader::FileReader(QObject *parent) : QObject(parent) FileReader::FileReader(QObject *parent) : QObject(parent)
{ {
@ -27,13 +28,27 @@ FileReader::FileReader(QObject *parent) : QObject(parent)
void FileReader::getTextContent(QString path, QString &textContent) void FileReader::getTextContent(QString path, QString &textContent)
{ {
//获取所有文件内容
//先分类
QString type =FileUtils::getMimetype(path,true); QString type =FileUtils::getMimetype(path,true);
QFileInfo file(path);
QString strsfx = file.suffix();
if(type == "application/zip") if(type == "application/zip")
{
if(strsfx.endsWith( "docx"))
FileUtils::getDocxTextContent(path,textContent); FileUtils::getDocxTextContent(path,textContent);
}
else if(type == "text/plain") else if(type == "text/plain")
{
if(strsfx.endsWith( "txt"))
FileUtils::getTxtContent(path,textContent); FileUtils::getTxtContent(path,textContent);
}
else if(type == "application/x-ole-storage")
{
if (strsfx.endsWith("doc"))
{
KBinaryParser searchdata;
searchdata.RunParser(path,textContent);
}
}
return; return;
} }

View File

@ -21,6 +21,7 @@
#define FILEREADER_H #define FILEREADER_H
#include <QObject> #include <QObject>
#include <QFileInfo>
class FileReader : public QObject class FileReader : public QObject
{ {

View File

@ -63,7 +63,7 @@ private:
QQueue<QVector<QString>>* q_index; QQueue<QVector<QString>>* q_index;
QQueue<QString>* q_content_index; QQueue<QString>* q_content_index;
const QVector<QString> targetFileTypeVec ={ const QVector<QString> targetFileTypeVec ={
// QString(".doc"), QString(".doc"),
QString(".docx"), QString(".docx"),
// QString(".ppt"), // QString(".ppt"),
// QString(".pptx"), // QString(".pptx"),

View File

@ -61,7 +61,7 @@ private:
QMap<int, QString> currentPath; QMap<int, QString> currentPath;
const QVector<QString> targetFileTypeVec ={ const QVector<QString> targetFileTypeVec ={
// QString(".doc"), QString(".doc"),
QString(".docx"), QString(".docx"),
// QString(".ppt"), // QString(".ppt"),
// QString(".pptx"), // QString(".pptx"),

View File

@ -22,6 +22,7 @@ DEFINES += QT_DEPRECATED_WARNINGS
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
include(index/index.pri) include(index/index.pri)
include(parser/parser.pri))
include(appsearch/appsearch.pri) include(appsearch/appsearch.pri)
include(settingsearch/settingsearch.pri)) include(settingsearch/settingsearch.pri))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,116 @@
#ifndef SEARCHHELPER_H
#define SEARCHHELPER_H
#include <QtCore>
#include <QtConcurrent/QtConcurrent>
#define ULONG unsigned long
#define UCHAR unsigned char
#define USHORT unsigned short
typedef enum
{
Word = 0,
Excel,
Ppt
}TYPE;
/* Property Set Storage */
typedef struct pps_tag
{
ULONG ulSB;
ULONG ulSize;
} ppsTag;
typedef struct pps_info_tag
{
ppsTag tWordDocument; /* Text stream */
ppsTag tWorkBook;
ppsTag tPPTDocument;
ppsTag tData; /* Data stream */
ppsTag tTable; /* Table stream */
ppsTag tSummaryInfo; /* Summary Information */
ppsTag tDocSummaryInfo;/* Document Summary Information */
ppsTag t0Table; /* Table 0 stream */
ppsTag t1Table; /* Table 1 stream */
ppsTag tCurrentUser;
TYPE type;
} ppsInfoType;
/* Private type for Property Set Storage entries */
typedef struct pps_entry_tag
{
ULONG ulNext;
ULONG ulPrevious;
ULONG ulDir;
ULONG ulSB;
ULONG ulSize;
int iLevel;
char szName[32];
UCHAR ucType;
} ppsEntryType;
/* Excel Record Struct*/
//typedef struct excelRecord
//{
// excelRecord()
// {
// usLen = 0;
// usRichLen = 0;
// ulWLen = 0;
// bUni = false;
// }
// ushort usLen;
// ushort usRichLen;
// ulong ulWLen;
// bool bUni;
//} excelRecord;
typedef struct readDataParam
{
readDataParam()
{
ulStBlk = 0;
pFile = NULL;
ulBBd = NULL;
tBBdLen = 0;
usBlkSize = 0;
}
ulong ulStBlk;
FILE *pFile;
ulong *ulBBd;
size_t tBBdLen;
ushort usBlkSize;
}rdPara;
class KBinaryParser :public QObject
{
Q_OBJECT
public:
KBinaryParser(QObject *parent=0);
~KBinaryParser();
public:
bool RunParser(QString strFile,QString &content);
private:
bool bGetPPS(FILE *pFile,
const ULONG *aulRootList, size_t tRootListLen, ppsInfoType *pPPS);
int readData(rdPara &readParam, uchar *aucBuffer, ulong ulOffset, size_t tToRead);
int InitDocOle(FILE *pFile,long lFilesize,QString &content);
bool read8DocText(FILE *pFile, const ppsInfoType *pPPS,
const ULONG *aulBBD, size_t tBBDLen,
const ULONG *aulSBD, size_t tSBDLen,
const UCHAR *aucHeader,QString &content);
// int readSSTRecord(readDataParam &rdParam, ppsInfoType, ulong &ulOff, ushort usPartLen);
// int read8BiffRecord(uchar uFlag, ulong ulOff, ulong &ulNext, readDataParam &rdParam, excelRecord &eR);
// ULONG readPPtRecord(FILE* pFile, ppsInfoType* PPS_info, ULONG* aulBBD,
// size_t tBBDLen, ULONG ulPos);
QString m_strFileName;
};
#endif // SEARCHHELPER_H

25
libsearch/parser/common.h Normal file
View File

@ -0,0 +1,25 @@
#ifndef COMMON_H
#define COMMON_H
#include <QtCore>
#include <QtConcurrent/QtConcurrent>
#define SERVER "Everything"
#define LOG(a) \
//qWarning() << a;
#define REHASH(a) \
if (sl_minus_1 < (int)sizeof(int) * CHAR_BIT) \
hashHaystack -= (a) << sl_minus_1; \
hashHaystack <<= 1
void* xmalloc(size_t tSize);
void* xcalloc(size_t tNmemb, size_t tSize);
void* xrealloc(void *pvArg, size_t tSize);
void* xfree(void *pvArg);
#endif // COMMON_H

View File

@ -0,0 +1,9 @@
INCLUDEPATH += $$PWD
HEADERS += \
$$PWD/common.h \
$$PWD/binary-parser.h
SOURCES += \
$$PWD/binary-parser.cpp