Add support for '.xls', '.dot','.wps', '.pps', '.dps', '.et','.ppt'

This commit is contained in:
iaom 2021-04-06 02:21:40 +08:00
parent d050d4ea28
commit 3233e4af5c
7 changed files with 243 additions and 219 deletions

View File

@ -44,7 +44,8 @@ void FileReader::getTextContent(QString path, QString &textContent)
} }
else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage")
{ {
if (strsfx.endsWith("doc")) if (strsfx.endsWith("doc") || strsfx.endsWith("dot") || strsfx.endsWith("wps") || strsfx.endsWith("ppt") ||
strsfx.endsWith("pps") ||strsfx.endsWith("dps") || strsfx.endsWith("et") || strsfx.endsWith("xls"))
{ {
KBinaryParser searchdata; KBinaryParser searchdata;
searchdata.RunParser(path,textContent); searchdata.RunParser(path,textContent);

View File

@ -28,6 +28,7 @@ class FileReader : public QObject
Q_OBJECT Q_OBJECT
public: public:
explicit FileReader(QObject *parent = nullptr); explicit FileReader(QObject *parent = nullptr);
~FileReader()=default;
static void getTextContent(QString path, QString &textContent); static void getTextContent(QString path, QString &textContent);
}; };

View File

@ -65,11 +65,16 @@ private:
const QVector<QString> targetFileTypeVec ={ const QVector<QString> targetFileTypeVec ={
QString(".doc"), QString(".doc"),
QString(".docx"), QString(".docx"),
// QString(".ppt"), QString(".ppt"),
// QString(".pptx"), // QString(".pptx"),
// QString(".xls"), QString(".xls"),
// QString(".xlsx"), // QString(".xlsx"),
QString(".txt")}; QString(".txt"),
QString(".dot"),
QString(".wps"),
QString(".pps"),
QString(".dps"),
QString(".et")};
//xapian will auto commit per 10,000 changes, donnot change it!!! //xapian will auto commit per 10,000 changes, donnot change it!!!
const size_t u_send_length = 8192; const size_t u_send_length = 8192;

View File

@ -63,11 +63,16 @@ private:
const QVector<QString> targetFileTypeVec ={ const QVector<QString> targetFileTypeVec ={
QString(".doc"), QString(".doc"),
QString(".docx"), QString(".docx"),
// QString(".ppt"), QString(".ppt"),
// QString(".pptx"), // QString(".pptx"),
// QString(".xls"), QString(".xls"),
// QString(".xlsx"), // QString(".xlsx"),
QString(".txt")}; QString(".txt"),
QString(".dot"),
QString(".wps"),
QString(".pps"),
QString(".dps"),
QString(".et")};
}; };
#endif // INOTIFYINDEX_H #endif // INOTIFYINDEX_H

View File

@ -5058,9 +5058,10 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS,
if (bUsesUnicode) if (bUsesUnicode)
{ {
ushort* usAucData = (ushort*)ptaucBytes; ushort* usAucData = (ushort*)ptaucBytes;
content.append(QString::fromUtf16(usAucData).replace("\r","")); content.append(QString::fromUtf16(usAucData).replace("\r",""));
usAucData = (ushort*)xfree((void*)usAucData); usAucData = (ushort*)xfree((void*)usAucData);
ptaucBytes = NULL;
if(content.length() >= 682666) //20480000/3 if(content.length() >= 682666) //20480000/3
break; break;
} }
@ -5080,176 +5081,186 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS,
return false; return false;
}/* end of bGet8DocumentText */ }/* end of bGet8DocumentText */
//int KBinaryParser::readSSTRecord(readDataParam &rdParam, ppsInfoType PPS_info, ulong &ulOff, ushort usPartLen) int KBinaryParser:: readSSTRecord(readDataParam &rdParam, ppsInfoType PPS_info, ulong &ulOff, ushort usPartLen, QString &content)
//{ {
// UCHAR chSizeData[8]; UCHAR chSizeData[8];
// if (readData(rdParam, chSizeData, ulOff, 8) != 0) if (readData(rdParam, chSizeData, ulOff, 8) != 0)
// return -1; return -1;
// ulOff += 8; ulOff += 8;
// usPartLen -= 8; usPartLen -= 8;
// ulong ulSize = ulGetLong(0x04, chSizeData) + 1; ulong ulSize = ulGetLong(0x04, chSizeData) + 1;
// ulong ulCount = 1; ulong ulCount = 1;
// ulong ulNextOff = 0; ulong ulNextOff = 0;
// while ((ulCount < ulSize) && (ulOff < PPS_info.tWorkBook.ulSize)) while ((ulCount < ulSize) && (ulOff < PPS_info.tWorkBook.ulSize))
// { {
// UCHAR chHeader[3]; UCHAR chHeader[3];
// if (readData(rdParam, chHeader, ulOff + ulNextOff, 3) != 0) if (readData(rdParam, chHeader, ulOff + ulNextOff, 3) != 0)
// break; break;
// ushort uscharlen = usGetWord(0x00, chHeader); ushort uscharlen = usGetWord(0x00, chHeader);
// ushort usCharByteLen = uscharlen; ushort usCharByteLen = uscharlen;
// UCHAR ucFlag = ucGetByte(0x02, chHeader); UCHAR ucFlag = ucGetByte(0x02, chHeader);
// ulNextOff += 3; ulNextOff += 3;
// excelRecord eRrd; excelRecord eRrd;
// if (read8BiffRecord(ucFlag, ulOff, ulNextOff, rdParam, eRrd) != 0) if (read8BiffRecord(ucFlag, ulOff, ulNextOff, rdParam, eRrd) != 0)
// break; break;
// ushort ustotalLen = uscharlen + eRrd.usRichLen * 4 + eRrd.ulWLen; ushort ustotalLen = uscharlen + eRrd.usRichLen * 4 + eRrd.ulWLen;
// if (!eRrd.bUni) if (!eRrd.bUni)
// ustotalLen += uscharlen; ustotalLen += uscharlen;
// UCHAR* chData= (UCHAR*)xmalloc(ustotalLen); UCHAR* chData= (UCHAR*)xmalloc(ustotalLen);
// if (ulNextOff < usPartLen && (ulNextOff + ustotalLen) >= usPartLen) if (ulNextOff < usPartLen && (ulNextOff + ustotalLen) >= usPartLen)
// { {
// ushort usIdf = usPartLen - ulNextOff; ushort usIdf = usPartLen - ulNextOff;
// uchar chTemp[MAX_BUFF_SIZE]; uchar chTemp[MAX_BUFF_SIZE];
// memset(chTemp, 0 ,MAX_BUFF_SIZE); memset(chTemp, 0 ,MAX_BUFF_SIZE);
// if (readData(rdParam, chTemp, ulOff + ulNextOff, usIdf + 5) != 0) if (readData(rdParam, chTemp, ulOff + ulNextOff, usIdf + 5) != 0)
// break; break;
// bool bTemp = false; bool bTemp = false;
// ulOff += usPartLen; ulOff += usPartLen;
// ulOff += 4; ulOff += 4;
// memcpy(&usPartLen, chTemp + usIdf + 2, 2); memcpy(&usPartLen, chTemp + usIdf + 2, 2);
// ushort usOthTxtLen = ustotalLen - usIdf; ushort usOthTxtLen = ustotalLen - usIdf;
// bool bAnotherCompare = (usOthTxtLen == 0 || (usCharByteLen - usIdf) == 0) || usCharByteLen < usIdf; bool bAnotherCompare = (usOthTxtLen == 0 || (usCharByteLen - usIdf) == 0) || usCharByteLen < usIdf;
// ulong ulNoUse = 0; ulong ulNoUse = 0;
// bool bUniFlg = false; bool bUniFlg = false;
// if (!bAnotherCompare) if (!bAnotherCompare)
// { {
// uchar chFlag; uchar chFlag;
// memcpy(&chFlag, chTemp + usIdf + 4, 1); memcpy(&chFlag, chTemp + usIdf + 4, 1);
// if (chFlag == 0x00 || chFlag == 0x01 || chFlag == 0x05 || chFlag == 0x09 || chFlag == 0x08 || chFlag == 0x04 || chFlag == 0x0c) if (chFlag == 0x00 || chFlag == 0x01 || chFlag == 0x05 || chFlag == 0x09 || chFlag == 0x08 || chFlag == 0x04 || chFlag == 0x0c)
// { {
// bTemp = true; bTemp = true;
// ulOff ++; ulOff ++;
// ulong ulNextTep = 0; ulong ulNextTep = 0;
// excelRecord eRTmp; excelRecord eRTmp;
// if (read8BiffRecord(chFlag, ulOff, ulNextTep, rdParam, eRTmp) != 0) if (read8BiffRecord(chFlag, ulOff, ulNextTep, rdParam, eRTmp) != 0)
// break; break;
// ulOff += ulNextTep; ulOff += ulNextTep;
// bUniFlg = eRTmp.bUni; bUniFlg = eRTmp.bUni;
// ulNoUse = eRTmp.usRichLen * 4 + eRTmp.ulWLen; ulNoUse = eRTmp.usRichLen * 4 + eRTmp.ulWLen;
// } }
// } }
// memcpy(chData, chTemp, usIdf); memcpy(chData, chTemp, usIdf);
// ulNextOff = 0; ulNextOff = 0;
// ustotalLen = usOthTxtLen + ulNoUse; ustotalLen = usOthTxtLen + ulNoUse;
// if (usOthTxtLen > 0) if (usOthTxtLen > 0)
// { {
// memset(chTemp, 0 ,MAX_BUFF_SIZE); memset(chTemp, 0 ,MAX_BUFF_SIZE);
// if (readData(rdParam, chTemp, ulOff, usOthTxtLen) != 0) if (readData(rdParam, chTemp, ulOff, usOthTxtLen) != 0)
// return -1; return -1;
// memcpy(chData + usIdf , chTemp , usOthTxtLen); memcpy(chData + usIdf , chTemp , usOthTxtLen);
// } }
// if (bTemp) if (bTemp)
// usPartLen --; usPartLen --;
// } }
// else else
// { {
// if (readData(rdParam, chData, ulOff + ulNextOff, ustotalLen) != 0) if (readData(rdParam, chData, ulOff + ulNextOff, ustotalLen) != 0)
// break; break;
// } }
// if (eRrd.bUni) if (eRrd.bUni)
// { {
//// QtConcurrent::run(this, &KBinaryParser::compare2Word, (const char*)chData, m_strKey, m_strFileName); qDebug()<<QString((const char*)chData);
// } chData = (UCHAR*)xfree((void*)chData);
// else qWarning()<<"Unsupport excel type:"<<m_strFileName;
// { }
// ushort* usData = (ushort*)chData; else
// qDebug() << QString::fromUtf16(usData); {
//// QtConcurrent::run(this, &KBinaryParser::compare2UsWord, usData, uscharlen, m_strKey, m_strKey.length(), m_strFileName); ushort* usData = (ushort*)chData;
// }
// ulNextOff += ustotalLen;
// ulCount += 1;
// }
// if (ulCount >= ulSize) content.append(QString::fromUtf16(usData).replace("\r",""));
// return -1; usData = (ushort*)xfree((void*)usData);
//} chData = NULL;
if(content.length() >= 682666) //20480000/3
break;
}
ulNextOff += ustotalLen;
ulCount += 1;
}
//int KBinaryParser::read8BiffRecord(uchar ucFlag, ulong ulOff, ulong &ulNext, readDataParam &rdParam, excelRecord &eR) if (ulCount >= ulSize)
//{ return -1;
// bool butf8 = true; }
// if (ucFlag & 0x08)
// {
// uchar chiRich[2];
// if (readData(rdParam, chiRich, ulOff + ulNext, 2) != 0)
// return -1;
// eR.usRichLen = usGetWord(0x00, chiRich);
// ulNext += 2;
// }
// if(ucFlag & 0x04)
// {
// uchar chExt[4];
// if (readData(rdParam, chExt, ulOff + ulNext, 4) != 0)
// return -1;
// eR.ulWLen = ulGetLong(0x00, chExt);
// ulNext += 4;
// }
// if ((ucFlag & 0x01))
// {
// butf8 = false;
// }
// eR.bUni = butf8;
// return 0;
//}
//ULONG KBinaryParser::readPPtRecord(FILE* pFile, ppsInfoType* PPS_info, ULONG* aulBBD, size_t tBBDLen, ULONG ulPos) int KBinaryParser::read8BiffRecord(uchar ucFlag, ulong ulOff, ulong &ulNext, readDataParam &rdParam, excelRecord &eR)
//{ {
// UCHAR aucHeader[PPT_RECORD_HEADER]; bool butf8 = true;
// ULONG ulOff = ulPos; if (ucFlag & 0x08)
// /* Read the headerblock */ {
// if (!bReadBuffer(pFile, PPS_info->tPPTDocument.ulSB, uchar chiRich[2];
// aulBBD, tBBDLen, BIG_BLOCK_SIZE, if (readData(rdParam, chiRich, ulOff + ulNext, 2) != 0)
// aucHeader, ulOff, PPT_RECORD_HEADER)) return -1;
// return -1; eR.usRichLen = usGetWord(0x00, chiRich);
ulNext += 2;
}
if(ucFlag & 0x04)
{
uchar chExt[4];
if (readData(rdParam, chExt, ulOff + ulNext, 4) != 0)
return -1;
eR.ulWLen = ulGetLong(0x00, chExt);
ulNext += 4;
}
if ((ucFlag & 0x01))
{
butf8 = false;
}
eR.bUni = butf8;
return 0;
}
// ulOff += PPT_RECORD_HEADER; ULONG KBinaryParser::readPPtRecord(FILE* pFile, ppsInfoType* PPS_info, ULONG* aulBBD, size_t tBBDLen, ULONG ulPos,QString &content)
// USHORT usVersion = usGetWord(0x00, aucHeader); {
// USHORT usType = usGetWord(0x02, aucHeader); UCHAR aucHeader[PPT_RECORD_HEADER];
// ULONG ulLen = ulGetLong(0x04, aucHeader); ULONG ulOff = ulPos;
// USHORT usVer = usVersion & 0xF; /* Read the headerblock */
// if (usVer == 0xF) if (!bReadBuffer(pFile, PPS_info->tPPTDocument.ulSB,
// { aulBBD, tBBDLen, BIG_BLOCK_SIZE,
// while (ulOff < ulLen) aucHeader, ulOff, PPT_RECORD_HEADER))
// { return -1;
// ulOff = readPPtRecord(pFile, PPS_info, aulBBD, tBBDLen, ulOff);
// }
// }
// else
// {
// if (usType == PPT_TEXTBYTEATOM || usType == PPT_TEXTCHARATOM)
// {
// long llen = (long)ulLen;
// UCHAR* chData = (UCHAR*)xmalloc(llen);
// if (!bReadBuffer(pFile, PPS_info->tPPTDocument.ulSB,
// aulBBD, tBBDLen, BIG_BLOCK_SIZE,
// chData, ulOff, llen))
// return -1;
// ushort* usData = (ushort*)chData;
// qDebug() << QString::fromUtf16(usData);
//// QtConcurrent::run(this, &KBinaryParser::compare2UsWord, usData, llen / 2, strKey, strKey.length(), m_strFileName); ulOff += PPT_RECORD_HEADER;
// } USHORT usVersion = usGetWord(0x00, aucHeader);
// ulOff += ulLen; USHORT usType = usGetWord(0x02, aucHeader);
// } ULONG ulLen = ulGetLong(0x04, aucHeader);
// return ulOff; USHORT usVer = usVersion & 0xF;
//} if (usVer == 0xF)
{
while (ulOff < ulLen)
{
ulOff = readPPtRecord(pFile, PPS_info, aulBBD, tBBDLen, ulOff,content);
}
}
else
{
if (usType == PPT_TEXTBYTEATOM || usType == PPT_TEXTCHARATOM)
{
long llen = (long)ulLen;
UCHAR* chData = (UCHAR*)xmalloc(llen);
if (!bReadBuffer(pFile, PPS_info->tPPTDocument.ulSB,
aulBBD, tBBDLen, BIG_BLOCK_SIZE,
chData, ulOff, llen))
return -1;
ushort* usData = (ushort*)chData;
content.append(QString::fromUtf16(usData).replace("\r",""));
usData = (ushort*)xfree((void*)usData);
chData = NULL;
}
ulOff += ulLen;
if(content.length() >= 682666) //20480000/3
return ulOff;
}
return ulOff;
}
int KBinaryParser::InitDocOle(FILE* pFile,long lFilesize,QString &content) int KBinaryParser::InitDocOle(FILE* pFile,long lFilesize,QString &content)
{ {
@ -5399,44 +5410,44 @@ int KBinaryParser::InitDocOle(FILE* pFile,long lFilesize,QString &content)
aulBBD, tBBDLen, aulSBD, tSBDLen, aulBBD, tBBDLen, aulSBD, tSBDLen,
aucHeader,content); aucHeader,content);
} }
else if (PPS_info.type == Excel)
{
readParam.ulStBlk = PPS_info.tWorkBook.ulSB;
UCHAR aucHeader[4];
ulong ulOff = 0;
if (readData(readParam, aucHeader, 0, 8) != 0)
return -1;
ulOff += 4;
USHORT usType = usGetWord(0x00, aucHeader);
while (ulOff < PPS_info.tWorkBook.ulSize)
{
USHORT usLen = usGetWord(0x02, aucHeader);
ulOff += usLen;
if (readData(readParam, aucHeader, ulOff, 4) != 0)
break;
ulOff += 4;
usType = usGetWord(0x00, aucHeader);
ushort usPartLen = usGetWord(0x02, aucHeader);
if (usType == 0x00FC)
{
if (readSSTRecord(readParam, PPS_info, ulOff, usPartLen,content) != 0)
break;
}
}
}
else if (PPS_info.type == Ppt)
{
ULONG ulOff = 0;
while (ulOff < PPS_info.tPPTDocument.ulSize)
{
ulOff = readPPtRecord(pFile, &PPS_info, aulBBD, tBBDLen, ulOff,content);
}
}
else else
{ {
qWarning()<<"Unsupport doc type:"<<m_strFileName; qWarning()<<"Unsupport doc type:"<<m_strFileName;
} }
// else if (PPS_info.type == Excel)
// {
// readParam.ulStBlk = PPS_info.tWorkBook.ulSB;
// UCHAR aucHeader[4];
// ulong ulOff = 0;
// if (readData(readParam, aucHeader, 0, 8) != 0)
// return -1;
// ulOff += 4;
// USHORT usType = usGetWord(0x00, aucHeader);
// while (ulOff < PPS_info.tWorkBook.ulSize)
// {
// USHORT usLen = usGetWord(0x02, aucHeader);
// ulOff += usLen;
// if (readData(readParam, aucHeader, ulOff, 4) != 0)
// break;
// ulOff += 4;
// usType = usGetWord(0x00, aucHeader);
// ushort usPartLen = usGetWord(0x02, aucHeader);
// if (usType == 0x00FC)
// {
// if (readSSTRecord(readParam, PPS_info, ulOff, usPartLen) != 0)
// break;
// }
// }
// }
// else if (PPS_info.type == Ppt)
// {
// ULONG ulOff = 0;
// while (ulOff < PPS_info.tPPTDocument.ulSize)
// {
// ulOff = readPPtRecord(pFile, &PPS_info, aulBBD, tBBDLen, ulOff,);
// }
// }
return 0; return 0;
} }

View File

@ -50,20 +50,20 @@ typedef struct pps_entry_tag
} ppsEntryType; } ppsEntryType;
/* Excel Record Struct*/ /* Excel Record Struct*/
//typedef struct excelRecord typedef struct excelRecord
//{ {
// excelRecord() excelRecord()
// { {
// usLen = 0; usLen = 0;
// usRichLen = 0; usRichLen = 0;
// ulWLen = 0; ulWLen = 0;
// bUni = false; bUni = false;
// } }
// ushort usLen; ushort usLen;
// ushort usRichLen; ushort usRichLen;
// ulong ulWLen; ulong ulWLen;
// bool bUni; bool bUni;
//} excelRecord; } excelRecord;
typedef struct readDataParam typedef struct readDataParam
{ {
@ -104,11 +104,11 @@ private:
const ULONG *aulSBD, size_t tSBDLen, const ULONG *aulSBD, size_t tSBDLen,
const UCHAR *aucHeader,QString &content); const UCHAR *aucHeader,QString &content);
// int readSSTRecord(readDataParam &rdParam, ppsInfoType, ulong &ulOff, ushort usPartLen); int readSSTRecord(readDataParam &rdParam, ppsInfoType, ulong &ulOff, ushort usPartLen,QString &content);
// int read8BiffRecord(uchar uFlag, ulong ulOff, ulong &ulNext, readDataParam &rdParam, excelRecord &eR); int read8BiffRecord(uchar uFlag, ulong ulOff, ulong &ulNext, readDataParam &rdParam, excelRecord &eR);
// ULONG readPPtRecord(FILE* pFile, ppsInfoType* PPS_info, ULONG* aulBBD, ULONG readPPtRecord(FILE* pFile, ppsInfoType* PPS_info, ULONG* aulBBD,
// size_t tBBDLen, ULONG ulPos); size_t tBBDLen, ULONG ulPos,QString &content);
QString m_strFileName; QString m_strFileName;
}; };

View File

@ -112,6 +112,7 @@ void ContentWidget::initListView()
m_appListView = new SearchListView(m_resultList, QStringList(), SearchItem::SearchType::Apps); m_appListView = new SearchListView(m_resultList, QStringList(), SearchItem::SearchType::Apps);
m_bestListView = new SearchListView(m_resultList, QStringList(), SearchItem::SearchType::Best); m_bestListView = new SearchListView(m_resultList, QStringList(), SearchItem::SearchType::Best);
m_webListView = new SearchListView(m_resultList, QStringList(), SearchItem::SearchType::Web); m_webListView = new SearchListView(m_resultList, QStringList(), SearchItem::SearchType::Web);
setupConnect(m_fileListView); setupConnect(m_fileListView);
setupConnect(m_dirListView); setupConnect(m_dirListView);
setupConnect(m_contentListView); setupConnect(m_contentListView);