forked from openkylin/ukui-search
Merge pull request #156 from iaom/0301-dev
Add support for 'doc' file in file content search.
This commit is contained in:
commit
c79998301f
|
@ -28,8 +28,6 @@
|
|||
#include "quazip/quazip.h"
|
||||
#include <quazip/quazipfile.h>
|
||||
#include <QDomDocument>
|
||||
#include <QMimeDatabase>
|
||||
#include <QMimeType>
|
||||
#include <QQueue>
|
||||
#include "uchardet/uchardet.h"
|
||||
|
||||
|
@ -211,14 +209,12 @@ void FileUtils::loadHanziTable(const QString &fileName)
|
|||
return;
|
||||
}
|
||||
|
||||
QString FileUtils::getMimetype(QString &path, bool getsuffix)
|
||||
QMimeType FileUtils::getMimetype(QString &path)
|
||||
{
|
||||
QMimeDatabase mdb;
|
||||
QMimeType type = mdb.mimeTypeForFile(path,QMimeDatabase::MatchContent);
|
||||
if(getsuffix)
|
||||
return type.name();
|
||||
else
|
||||
return type.preferredSuffix();
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
//aborted
|
||||
|
@ -559,7 +555,7 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent)
|
|||
const char *codec = uchardet_get_charset(chardet);
|
||||
|
||||
if(QTextCodec::codecForName(codec) == 0)
|
||||
qWarning()<<"Unsupported Text encoding format"<<path<<QString::fromLocal8Bit(codec)<<"zpf666";
|
||||
qWarning()<<"Unsupported Text encoding format"<<path<<QString::fromLocal8Bit(codec);
|
||||
|
||||
QTextStream stream(encodedString,QIODevice::ReadOnly);
|
||||
stream.setCodec(codec);
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
#include <QCryptographicHash>
|
||||
#include <QIcon>
|
||||
#include <QMap>
|
||||
#include <QMimeDatabase>
|
||||
#include <QMimeType>
|
||||
#include "libsearch_global.h"
|
||||
//#define INITIAL_STATE 0
|
||||
//#define CREATING_INDEX 1
|
||||
|
@ -59,7 +61,7 @@ public:
|
|||
static void loadHanziTable(const QString&);
|
||||
|
||||
//parse text,docx.....
|
||||
static QString getMimetype(QString &path, bool getsuffix = false);
|
||||
static QMimeType getMimetype(QString &path);
|
||||
static void getDocxTextContent(QString &path, QString &textcontent);
|
||||
static void getTxtContent(QString &path, QString &textcontent);
|
||||
static size_t _max_index_count;
|
||||
|
|
|
@ -122,6 +122,8 @@ void ConstructDocumentForContent::run()
|
|||
_doc_list_content = new QList<Document>;
|
||||
QString content;
|
||||
FileReader::getTextContent(m_path,content);
|
||||
if(content.isEmpty())
|
||||
return;
|
||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
*/
|
||||
#include "file-reader.h"
|
||||
#include "file-utils.h"
|
||||
#include "binary-parser.h"
|
||||
|
||||
FileReader::FileReader(QObject *parent) : QObject(parent)
|
||||
{
|
||||
|
@ -27,13 +28,32 @@ FileReader::FileReader(QObject *parent) : QObject(parent)
|
|||
|
||||
void FileReader::getTextContent(QString path, QString &textContent)
|
||||
{
|
||||
//获取所有文件内容
|
||||
//先分类
|
||||
QString type =FileUtils::getMimetype(path,true);
|
||||
if(type == "application/zip")
|
||||
QMimeType type = FileUtils::getMimetype(path);
|
||||
QString name = type.name();
|
||||
QFileInfo file(path);
|
||||
QString strsfx = file.suffix();
|
||||
if(name== "application/zip")
|
||||
{
|
||||
if(strsfx.endsWith( "docx"))
|
||||
FileUtils::getDocxTextContent(path,textContent);
|
||||
else if(type == "text/plain")
|
||||
}
|
||||
else if(name == "text/plain")
|
||||
{
|
||||
if(strsfx.endsWith( "txt"))
|
||||
FileUtils::getTxtContent(path,textContent);
|
||||
}
|
||||
else if(type.inherits("application/msword"))
|
||||
{
|
||||
if (strsfx.endsWith("doc"))
|
||||
{
|
||||
KBinaryParser searchdata;
|
||||
searchdata.RunParser(path,textContent);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
qWarning()<<"Unsupport format:["<<path<<"]["<<type.name()<<"]";
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#define FILEREADER_H
|
||||
|
||||
#include <QObject>
|
||||
#include <QFileInfo>
|
||||
|
||||
class FileReader : public QObject
|
||||
{
|
||||
|
|
|
@ -63,7 +63,7 @@ private:
|
|||
QQueue<QVector<QString>>* q_index;
|
||||
QQueue<QString>* q_content_index;
|
||||
const QVector<QString> targetFileTypeVec ={
|
||||
// QString(".doc"),
|
||||
QString(".doc"),
|
||||
QString(".docx"),
|
||||
// QString(".ppt"),
|
||||
// QString(".pptx"),
|
||||
|
|
|
@ -61,7 +61,7 @@ private:
|
|||
|
||||
QMap<int, QString> currentPath;
|
||||
const QVector<QString> targetFileTypeVec ={
|
||||
// QString(".doc"),
|
||||
QString(".doc"),
|
||||
QString(".docx"),
|
||||
// QString(".ppt"),
|
||||
// QString(".pptx"),
|
||||
|
|
|
@ -22,6 +22,7 @@ DEFINES += QT_DEPRECATED_WARNINGS
|
|||
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
||||
|
||||
include(index/index.pri)
|
||||
include(parser/parser.pri))
|
||||
include(appsearch/appsearch.pri)
|
||||
include(settingsearch/settingsearch.pri))
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,116 @@
|
|||
#ifndef SEARCHHELPER_H
|
||||
#define SEARCHHELPER_H
|
||||
#include <QtCore>
|
||||
#include <QtConcurrent/QtConcurrent>
|
||||
|
||||
#define ULONG unsigned long
|
||||
#define UCHAR unsigned char
|
||||
#define USHORT unsigned short
|
||||
|
||||
typedef enum
|
||||
{
|
||||
Word = 0,
|
||||
Excel,
|
||||
Ppt
|
||||
}TYPE;
|
||||
|
||||
/* Property Set Storage */
|
||||
typedef struct pps_tag
|
||||
{
|
||||
ULONG ulSB;
|
||||
ULONG ulSize;
|
||||
} ppsTag;
|
||||
|
||||
typedef struct pps_info_tag
|
||||
{
|
||||
ppsTag tWordDocument; /* Text stream */
|
||||
ppsTag tWorkBook;
|
||||
ppsTag tPPTDocument;
|
||||
ppsTag tData; /* Data stream */
|
||||
ppsTag tTable; /* Table stream */
|
||||
ppsTag tSummaryInfo; /* Summary Information */
|
||||
ppsTag tDocSummaryInfo;/* Document Summary Information */
|
||||
ppsTag t0Table; /* Table 0 stream */
|
||||
ppsTag t1Table; /* Table 1 stream */
|
||||
ppsTag tCurrentUser;
|
||||
TYPE type;
|
||||
} ppsInfoType;
|
||||
|
||||
/* Private type for Property Set Storage entries */
|
||||
typedef struct pps_entry_tag
|
||||
{
|
||||
ULONG ulNext;
|
||||
ULONG ulPrevious;
|
||||
ULONG ulDir;
|
||||
ULONG ulSB;
|
||||
ULONG ulSize;
|
||||
int iLevel;
|
||||
char szName[32];
|
||||
UCHAR ucType;
|
||||
} ppsEntryType;
|
||||
|
||||
/* Excel Record Struct*/
|
||||
//typedef struct excelRecord
|
||||
//{
|
||||
// excelRecord()
|
||||
// {
|
||||
// usLen = 0;
|
||||
// usRichLen = 0;
|
||||
// ulWLen = 0;
|
||||
// bUni = false;
|
||||
// }
|
||||
// ushort usLen;
|
||||
// ushort usRichLen;
|
||||
// ulong ulWLen;
|
||||
// bool bUni;
|
||||
//} excelRecord;
|
||||
|
||||
typedef struct readDataParam
|
||||
{
|
||||
readDataParam()
|
||||
{
|
||||
ulStBlk = 0;
|
||||
pFile = NULL;
|
||||
ulBBd = NULL;
|
||||
tBBdLen = 0;
|
||||
usBlkSize = 0;
|
||||
}
|
||||
ulong ulStBlk;
|
||||
FILE *pFile;
|
||||
ulong *ulBBd;
|
||||
size_t tBBdLen;
|
||||
ushort usBlkSize;
|
||||
}rdPara;
|
||||
|
||||
class KBinaryParser :public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
KBinaryParser(QObject *parent=0);
|
||||
~KBinaryParser();
|
||||
|
||||
public:
|
||||
bool RunParser(QString strFile,QString &content);
|
||||
|
||||
private:
|
||||
bool bGetPPS(FILE *pFile,
|
||||
const ULONG *aulRootList, size_t tRootListLen, ppsInfoType *pPPS);
|
||||
|
||||
int readData(rdPara &readParam, uchar *aucBuffer, ulong ulOffset, size_t tToRead);
|
||||
|
||||
int InitDocOle(FILE *pFile,long lFilesize,QString &content);
|
||||
bool read8DocText(FILE *pFile, const ppsInfoType *pPPS,
|
||||
const ULONG *aulBBD, size_t tBBDLen,
|
||||
const ULONG *aulSBD, size_t tSBDLen,
|
||||
const UCHAR *aucHeader,QString &content);
|
||||
|
||||
// int readSSTRecord(readDataParam &rdParam, ppsInfoType, ulong &ulOff, ushort usPartLen);
|
||||
// int read8BiffRecord(uchar uFlag, ulong ulOff, ulong &ulNext, readDataParam &rdParam, excelRecord &eR);
|
||||
|
||||
// ULONG readPPtRecord(FILE* pFile, ppsInfoType* PPS_info, ULONG* aulBBD,
|
||||
// size_t tBBDLen, ULONG ulPos);
|
||||
|
||||
QString m_strFileName;
|
||||
};
|
||||
|
||||
#endif // SEARCHHELPER_H
|
|
@ -0,0 +1,25 @@
|
|||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
#include <QtCore>
|
||||
#include <QtConcurrent/QtConcurrent>
|
||||
|
||||
#define SERVER "Everything"
|
||||
|
||||
#define LOG(a) \
|
||||
//qWarning() << a;
|
||||
|
||||
#define REHASH(a) \
|
||||
if (sl_minus_1 < (int)sizeof(int) * CHAR_BIT) \
|
||||
hashHaystack -= (a) << sl_minus_1; \
|
||||
hashHaystack <<= 1
|
||||
|
||||
void* xmalloc(size_t tSize);
|
||||
|
||||
void* xcalloc(size_t tNmemb, size_t tSize);
|
||||
|
||||
void* xrealloc(void *pvArg, size_t tSize);
|
||||
|
||||
void* xfree(void *pvArg);
|
||||
|
||||
|
||||
#endif // COMMON_H
|
|
@ -0,0 +1,9 @@
|
|||
INCLUDEPATH += $$PWD
|
||||
|
||||
HEADERS += \
|
||||
$$PWD/common.h \
|
||||
$$PWD/binary-parser.h
|
||||
|
||||
|
||||
SOURCES += \
|
||||
$$PWD/binary-parser.cpp
|
Loading…
Reference in New Issue