Add support for 'pptx'.

This commit is contained in:
iaom 2021-04-08 16:11:58 +08:00
parent 606a436f48
commit f1485deeac
5 changed files with 85 additions and 6 deletions

View File

@ -502,6 +502,7 @@ QStringList FileUtils::findMultiToneWords(const QString& hanzi)
*/
void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
{
//fix me :optimized by xpath??
QFileInfo info = QFileInfo(path);
if(!info.exists()||info.isDir())
return;
@ -517,6 +518,7 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
QDomDocument doc;
doc.setContent(fileR.readAll());
fileR.close();
QDomElement first = doc.firstChildElement("w:document");
QDomElement body = first.firstChildElement("w:body");
while(!body.isNull())
@ -529,7 +531,7 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
{
QDomElement wt = wr.firstChildElement("w:t");
textcontent.append(wt.text().replace("\n",""));
if(textcontent.length() >= 682666) //20480000/3
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
{
file.close();
return;
@ -544,13 +546,86 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
return;
}
void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
{
//fix me :optimized by xpath??
QFileInfo info = QFileInfo(path);
if(!info.exists()||info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
QString prefix("ppt/slides/slide");
QStringList fileList;
for(QString i : file.getFileNameList())
{
if(i.startsWith(prefix))
fileList<<i;
}
if(fileList.isEmpty())
return;
QDomElement sptree;
QDomElement sp;
QDomElement txbody;
QDomElement ap;
QDomElement ar;
QDomElement at;
for(int i =0;i<fileList.size();++i)
{
QString name = prefix + QString::number(i+1) + ".xml";
if(!file.setCurrentFile(name))
{
continue;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly);
QDomDocument doc;
doc.setContent(fileR.readAll());
fileR.close();
sptree = doc.firstChildElement("p:sld").firstChildElement("p:cSld").firstChildElement("p:spTree");
while(!sptree.isNull())
{
sp= sptree.firstChildElement("p:sp");
while(!sp.isNull())
{
txbody= sp.firstChildElement("p:txBody");
while(!txbody.isNull())
{
ap = txbody.firstChildElement("a:p");
while(!ap.isNull())
{
ar = ap.firstChildElement("a:r");
while(!ar.isNull())
{
at = ar.firstChildElement("a:t");
textcontent.append(at.text().replace("\r","")).replace("\t"," ");
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
{
file.close();
return;
}
ar = ar.nextSiblingElement();
}
ap = ap.nextSiblingElement();
}
txbody = txbody.nextSiblingElement();
}
sp = sp.nextSiblingElement();
}
sptree = sptree.nextSiblingElement();
}
}
file.close();
return;
}
void FileUtils::getTxtContent(QString &path, QString &textcontent)
{
QFile file(path);
if(!file.open(QIODevice::ReadOnly|QIODevice::Text))
return;
QByteArray encodedString = file.read(20480000);
QByteArray encodedString = file.read(MAX_CONTENT_LENGTH);
uchardet_t chardet = uchardet_new();
if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0)

View File

@ -39,6 +39,7 @@
//#define INITIAL_STATE 0
//#define CREATING_INDEX 1
//#define FINISH_CREATING_INDEX 2
#define MAX_CONTENT_LENGTH 20480000
#define UKUI_SEARCH_PIPE_PATH (QDir::homePath()+"/.config/org.ukui/ukui-search/ukuisearch").toLocal8Bit().constData()
@ -64,6 +65,7 @@ public:
//parse text,docx.....
static QMimeType getMimetype(QString &path);
static void getDocxTextContent(QString &path, QString &textcontent);
static void getPptxTextContent(QString &path, QString &textcontent);
static void getTxtContent(QString &path, QString &textcontent);
static size_t _max_index_count;
static size_t _current_index_count; //this one has been Abandoned,do not use it.

View File

@ -36,6 +36,8 @@ void FileReader::getTextContent(QString path, QString &textContent)
{
if(strsfx.endsWith( "docx"))
FileUtils::getDocxTextContent(path,textContent);
if(strsfx.endsWith( "pptx"))
FileUtils::getPptxTextContent(path,textContent);
}
else if(name == "text/plain")
{

View File

@ -67,9 +67,9 @@ private:
std::map<QString, bool>::value_type("doc", true),
std::map<QString, bool>::value_type("docx", true),
std::map<QString, bool>::value_type("ppt", true),
// std::map<QString, bool>::value_type(".pptx", true),
std::map<QString, bool>::value_type("pptx", true),
std::map<QString, bool>::value_type("xls", true),
// std::map<QString, bool>::value_type(".xlsx", true),
// std::map<QString, bool>::value_type("xlsx", true),
std::map<QString, bool>::value_type("txt", true),
std::map<QString, bool>::value_type("dot", true),
std::map<QString, bool>::value_type("wps", true),

View File

@ -65,9 +65,9 @@ private:
std::map<QString, bool>::value_type("doc", true),
std::map<QString, bool>::value_type("docx", true),
std::map<QString, bool>::value_type("ppt", true),
// std::map<QString, bool>::value_type(".pptx", true),
std::map<QString, bool>::value_type("pptx", true),
std::map<QString, bool>::value_type("xls", true),
// std::map<QString, bool>::value_type(".xlsx", true),
// std::map<QString, bool>::value_type("xlsx", true),
std::map<QString, bool>::value_type("txt", true),
std::map<QString, bool>::value_type("dot", true),
std::map<QString, bool>::value_type("wps", true),