Add support for 'doc' file in file content search.
This commit is contained in:
parent
87d70b4ae0
commit
4a8076122e
|
@ -122,6 +122,8 @@ void ConstructDocumentForContent::run()
|
||||||
_doc_list_content = new QList<Document>;
|
_doc_list_content = new QList<Document>;
|
||||||
QString content;
|
QString content;
|
||||||
FileReader::getTextContent(m_path,content);
|
FileReader::getTextContent(m_path,content);
|
||||||
|
if(content.isEmpty())
|
||||||
|
return;
|
||||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
||||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
*/
|
*/
|
||||||
#include "file-reader.h"
|
#include "file-reader.h"
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
|
#include "binary-parser.h"
|
||||||
|
|
||||||
FileReader::FileReader(QObject *parent) : QObject(parent)
|
FileReader::FileReader(QObject *parent) : QObject(parent)
|
||||||
{
|
{
|
||||||
|
@ -27,13 +28,27 @@ FileReader::FileReader(QObject *parent) : QObject(parent)
|
||||||
|
|
||||||
void FileReader::getTextContent(QString path, QString &textContent)
|
void FileReader::getTextContent(QString path, QString &textContent)
|
||||||
{
|
{
|
||||||
//获取所有文件内容
|
|
||||||
//先分类
|
|
||||||
QString type =FileUtils::getMimetype(path,true);
|
QString type =FileUtils::getMimetype(path,true);
|
||||||
|
QFileInfo file(path);
|
||||||
|
QString strsfx = file.suffix();
|
||||||
if(type == "application/zip")
|
if(type == "application/zip")
|
||||||
|
{
|
||||||
|
if(strsfx.endsWith( "docx"))
|
||||||
FileUtils::getDocxTextContent(path,textContent);
|
FileUtils::getDocxTextContent(path,textContent);
|
||||||
|
}
|
||||||
else if(type == "text/plain")
|
else if(type == "text/plain")
|
||||||
|
{
|
||||||
|
if(strsfx.endsWith( "txt"))
|
||||||
FileUtils::getTxtContent(path,textContent);
|
FileUtils::getTxtContent(path,textContent);
|
||||||
|
}
|
||||||
|
else if(type == "application/x-ole-storage")
|
||||||
|
{
|
||||||
|
if (strsfx.endsWith("doc"))
|
||||||
|
{
|
||||||
|
KBinaryParser searchdata;
|
||||||
|
searchdata.RunParser(path,textContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#define FILEREADER_H
|
#define FILEREADER_H
|
||||||
|
|
||||||
#include <QObject>
|
#include <QObject>
|
||||||
|
#include <QFileInfo>
|
||||||
|
|
||||||
class FileReader : public QObject
|
class FileReader : public QObject
|
||||||
{
|
{
|
||||||
|
|
|
@ -63,7 +63,7 @@ private:
|
||||||
QQueue<QVector<QString>>* q_index;
|
QQueue<QVector<QString>>* q_index;
|
||||||
QQueue<QString>* q_content_index;
|
QQueue<QString>* q_content_index;
|
||||||
const QVector<QString> targetFileTypeVec ={
|
const QVector<QString> targetFileTypeVec ={
|
||||||
// QString(".doc"),
|
QString(".doc"),
|
||||||
QString(".docx"),
|
QString(".docx"),
|
||||||
// QString(".ppt"),
|
// QString(".ppt"),
|
||||||
// QString(".pptx"),
|
// QString(".pptx"),
|
||||||
|
|
|
@ -61,7 +61,7 @@ private:
|
||||||
|
|
||||||
QMap<int, QString> currentPath;
|
QMap<int, QString> currentPath;
|
||||||
const QVector<QString> targetFileTypeVec ={
|
const QVector<QString> targetFileTypeVec ={
|
||||||
// QString(".doc"),
|
QString(".doc"),
|
||||||
QString(".docx"),
|
QString(".docx"),
|
||||||
// QString(".ppt"),
|
// QString(".ppt"),
|
||||||
// QString(".pptx"),
|
// QString(".pptx"),
|
||||||
|
|
|
@ -22,6 +22,7 @@ DEFINES += QT_DEPRECATED_WARNINGS
|
||||||
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
||||||
|
|
||||||
include(index/index.pri)
|
include(index/index.pri)
|
||||||
|
include(parser/parser.pri))
|
||||||
include(appsearch/appsearch.pri)
|
include(appsearch/appsearch.pri)
|
||||||
include(settingsearch/settingsearch.pri))
|
include(settingsearch/settingsearch.pri))
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,116 @@
|
||||||
|
#ifndef SEARCHHELPER_H
|
||||||
|
#define SEARCHHELPER_H
|
||||||
|
#include <QtCore>
|
||||||
|
#include <QtConcurrent/QtConcurrent>
|
||||||
|
|
||||||
|
#define ULONG unsigned long
|
||||||
|
#define UCHAR unsigned char
|
||||||
|
#define USHORT unsigned short
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
Word = 0,
|
||||||
|
Excel,
|
||||||
|
Ppt
|
||||||
|
}TYPE;
|
||||||
|
|
||||||
|
/* Property Set Storage */
|
||||||
|
typedef struct pps_tag
|
||||||
|
{
|
||||||
|
ULONG ulSB;
|
||||||
|
ULONG ulSize;
|
||||||
|
} ppsTag;
|
||||||
|
|
||||||
|
typedef struct pps_info_tag
|
||||||
|
{
|
||||||
|
ppsTag tWordDocument; /* Text stream */
|
||||||
|
ppsTag tWorkBook;
|
||||||
|
ppsTag tPPTDocument;
|
||||||
|
ppsTag tData; /* Data stream */
|
||||||
|
ppsTag tTable; /* Table stream */
|
||||||
|
ppsTag tSummaryInfo; /* Summary Information */
|
||||||
|
ppsTag tDocSummaryInfo;/* Document Summary Information */
|
||||||
|
ppsTag t0Table; /* Table 0 stream */
|
||||||
|
ppsTag t1Table; /* Table 1 stream */
|
||||||
|
ppsTag tCurrentUser;
|
||||||
|
TYPE type;
|
||||||
|
} ppsInfoType;
|
||||||
|
|
||||||
|
/* Private type for Property Set Storage entries */
|
||||||
|
typedef struct pps_entry_tag
|
||||||
|
{
|
||||||
|
ULONG ulNext;
|
||||||
|
ULONG ulPrevious;
|
||||||
|
ULONG ulDir;
|
||||||
|
ULONG ulSB;
|
||||||
|
ULONG ulSize;
|
||||||
|
int iLevel;
|
||||||
|
char szName[32];
|
||||||
|
UCHAR ucType;
|
||||||
|
} ppsEntryType;
|
||||||
|
|
||||||
|
/* Excel Record Struct*/
|
||||||
|
//typedef struct excelRecord
|
||||||
|
//{
|
||||||
|
// excelRecord()
|
||||||
|
// {
|
||||||
|
// usLen = 0;
|
||||||
|
// usRichLen = 0;
|
||||||
|
// ulWLen = 0;
|
||||||
|
// bUni = false;
|
||||||
|
// }
|
||||||
|
// ushort usLen;
|
||||||
|
// ushort usRichLen;
|
||||||
|
// ulong ulWLen;
|
||||||
|
// bool bUni;
|
||||||
|
//} excelRecord;
|
||||||
|
|
||||||
|
typedef struct readDataParam
|
||||||
|
{
|
||||||
|
readDataParam()
|
||||||
|
{
|
||||||
|
ulStBlk = 0;
|
||||||
|
pFile = NULL;
|
||||||
|
ulBBd = NULL;
|
||||||
|
tBBdLen = 0;
|
||||||
|
usBlkSize = 0;
|
||||||
|
}
|
||||||
|
ulong ulStBlk;
|
||||||
|
FILE *pFile;
|
||||||
|
ulong *ulBBd;
|
||||||
|
size_t tBBdLen;
|
||||||
|
ushort usBlkSize;
|
||||||
|
}rdPara;
|
||||||
|
|
||||||
|
class KBinaryParser :public QObject
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
KBinaryParser(QObject *parent=0);
|
||||||
|
~KBinaryParser();
|
||||||
|
|
||||||
|
public:
|
||||||
|
bool RunParser(QString strFile,QString &content);
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool bGetPPS(FILE *pFile,
|
||||||
|
const ULONG *aulRootList, size_t tRootListLen, ppsInfoType *pPPS);
|
||||||
|
|
||||||
|
int readData(rdPara &readParam, uchar *aucBuffer, ulong ulOffset, size_t tToRead);
|
||||||
|
|
||||||
|
int InitDocOle(FILE *pFile,long lFilesize,QString &content);
|
||||||
|
bool read8DocText(FILE *pFile, const ppsInfoType *pPPS,
|
||||||
|
const ULONG *aulBBD, size_t tBBDLen,
|
||||||
|
const ULONG *aulSBD, size_t tSBDLen,
|
||||||
|
const UCHAR *aucHeader,QString &content);
|
||||||
|
|
||||||
|
// int readSSTRecord(readDataParam &rdParam, ppsInfoType, ulong &ulOff, ushort usPartLen);
|
||||||
|
// int read8BiffRecord(uchar uFlag, ulong ulOff, ulong &ulNext, readDataParam &rdParam, excelRecord &eR);
|
||||||
|
|
||||||
|
// ULONG readPPtRecord(FILE* pFile, ppsInfoType* PPS_info, ULONG* aulBBD,
|
||||||
|
// size_t tBBDLen, ULONG ulPos);
|
||||||
|
|
||||||
|
QString m_strFileName;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // SEARCHHELPER_H
|
|
@ -0,0 +1,25 @@
|
||||||
|
#ifndef COMMON_H
|
||||||
|
#define COMMON_H
|
||||||
|
#include <QtCore>
|
||||||
|
#include <QtConcurrent/QtConcurrent>
|
||||||
|
|
||||||
|
#define SERVER "Everything"
|
||||||
|
|
||||||
|
#define LOG(a) \
|
||||||
|
//qWarning() << a;
|
||||||
|
|
||||||
|
#define REHASH(a) \
|
||||||
|
if (sl_minus_1 < (int)sizeof(int) * CHAR_BIT) \
|
||||||
|
hashHaystack -= (a) << sl_minus_1; \
|
||||||
|
hashHaystack <<= 1
|
||||||
|
|
||||||
|
void* xmalloc(size_t tSize);
|
||||||
|
|
||||||
|
void* xcalloc(size_t tNmemb, size_t tSize);
|
||||||
|
|
||||||
|
void* xrealloc(void *pvArg, size_t tSize);
|
||||||
|
|
||||||
|
void* xfree(void *pvArg);
|
||||||
|
|
||||||
|
|
||||||
|
#endif // COMMON_H
|
|
@ -0,0 +1,9 @@
|
||||||
|
INCLUDEPATH += $$PWD
|
||||||
|
|
||||||
|
HEADERS += \
|
||||||
|
$$PWD/common.h \
|
||||||
|
$$PWD/binary-parser.h
|
||||||
|
|
||||||
|
|
||||||
|
SOURCES += \
|
||||||
|
$$PWD/binary-parser.cpp
|
Loading…
Reference in New Issue