Optimized file content parser.
This commit is contained in:
parent
4a8076122e
commit
3daf5ceab9
|
@ -28,8 +28,6 @@
|
|||
#include "quazip/quazip.h"
|
||||
#include <quazip/quazipfile.h>
|
||||
#include <QDomDocument>
|
||||
#include <QMimeDatabase>
|
||||
#include <QMimeType>
|
||||
#include <QQueue>
|
||||
#include "uchardet/uchardet.h"
|
||||
|
||||
|
@ -211,14 +209,12 @@ void FileUtils::loadHanziTable(const QString &fileName)
|
|||
return;
|
||||
}
|
||||
|
||||
QString FileUtils::getMimetype(QString &path, bool getsuffix)
|
||||
QMimeType FileUtils::getMimetype(QString &path)
|
||||
{
|
||||
QMimeDatabase mdb;
|
||||
QMimeType type = mdb.mimeTypeForFile(path,QMimeDatabase::MatchContent);
|
||||
if(getsuffix)
|
||||
return type.name();
|
||||
else
|
||||
return type.preferredSuffix();
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
//aborted
|
||||
|
@ -559,7 +555,7 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent)
|
|||
const char *codec = uchardet_get_charset(chardet);
|
||||
|
||||
if(QTextCodec::codecForName(codec) == 0)
|
||||
qWarning()<<"Unsupported Text encoding format"<<path<<QString::fromLocal8Bit(codec)<<"zpf666";
|
||||
qWarning()<<"Unsupported Text encoding format"<<path<<QString::fromLocal8Bit(codec);
|
||||
|
||||
QTextStream stream(encodedString,QIODevice::ReadOnly);
|
||||
stream.setCodec(codec);
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
#include <QCryptographicHash>
|
||||
#include <QIcon>
|
||||
#include <QMap>
|
||||
#include <QMimeDatabase>
|
||||
#include <QMimeType>
|
||||
#include "libsearch_global.h"
|
||||
//#define INITIAL_STATE 0
|
||||
//#define CREATING_INDEX 1
|
||||
|
@ -59,7 +61,7 @@ public:
|
|||
static void loadHanziTable(const QString&);
|
||||
|
||||
//parse text,docx.....
|
||||
static QString getMimetype(QString &path, bool getsuffix = false);
|
||||
static QMimeType getMimetype(QString &path);
|
||||
static void getDocxTextContent(QString &path, QString &textcontent);
|
||||
static void getTxtContent(QString &path, QString &textcontent);
|
||||
static size_t _max_index_count;
|
||||
|
|
|
@ -28,20 +28,21 @@ FileReader::FileReader(QObject *parent) : QObject(parent)
|
|||
|
||||
void FileReader::getTextContent(QString path, QString &textContent)
|
||||
{
|
||||
QString type =FileUtils::getMimetype(path,true);
|
||||
QMimeType type = FileUtils::getMimetype(path);
|
||||
QString name = type.name();
|
||||
QFileInfo file(path);
|
||||
QString strsfx = file.suffix();
|
||||
if(type == "application/zip")
|
||||
if(name== "application/zip")
|
||||
{
|
||||
if(strsfx.endsWith( "docx"))
|
||||
FileUtils::getDocxTextContent(path,textContent);
|
||||
}
|
||||
else if(type == "text/plain")
|
||||
else if(name == "text/plain")
|
||||
{
|
||||
if(strsfx.endsWith( "txt"))
|
||||
FileUtils::getTxtContent(path,textContent);
|
||||
}
|
||||
else if(type == "application/x-ole-storage")
|
||||
else if(type.inherits("application/msword"))
|
||||
{
|
||||
if (strsfx.endsWith("doc"))
|
||||
{
|
||||
|
@ -49,6 +50,10 @@ void FileReader::getTextContent(QString path, QString &textContent)
|
|||
searchdata.RunParser(path,textContent);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
qWarning()<<"Unsupport format:["<<path<<"]["<<type.name()<<"]";
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -5060,11 +5060,13 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS,
|
|||
{
|
||||
ushort* usAucData = (ushort*)ptaucBytes;
|
||||
content.append(QString::fromUtf16(usAucData).replace("\r",""));
|
||||
usAucData = (ushort*)xfree((void*)usAucData);
|
||||
}
|
||||
else
|
||||
{
|
||||
//need more format document
|
||||
qWarning()<<"Parser error:";
|
||||
ptaucBytes = (UCHAR*)xfree((void*)ptaucBytes);
|
||||
qWarning()<<"Parser error:"<<m_strFileName;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5393,6 +5395,10 @@ int KBinaryParser::InitDocOle(FILE* pFile,long lFilesize,QString &content)
|
|||
aulBBD, tBBDLen, aulSBD, tSBDLen,
|
||||
aucHeader,content);
|
||||
}
|
||||
else
|
||||
{
|
||||
qWarning()<<"Unsupport doc type:"<<m_strFileName;
|
||||
}
|
||||
// else if (PPS_info.type == Excel)
|
||||
// {
|
||||
// readParam.ulStBlk = PPS_info.tWorkBook.ulSB;
|
||||
|
|
Loading…
Reference in New Issue