forked from openkylin/ukui-search
Add support for 'pptx'.
This commit is contained in:
parent
606a436f48
commit
f1485deeac
|
@ -502,6 +502,7 @@ QStringList FileUtils::findMultiToneWords(const QString& hanzi)
|
|||
*/
|
||||
void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
||||
{
|
||||
//fix me :optimized by xpath??
|
||||
QFileInfo info = QFileInfo(path);
|
||||
if(!info.exists()||info.isDir())
|
||||
return;
|
||||
|
@ -517,6 +518,7 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
|||
|
||||
QDomDocument doc;
|
||||
doc.setContent(fileR.readAll());
|
||||
fileR.close();
|
||||
QDomElement first = doc.firstChildElement("w:document");
|
||||
QDomElement body = first.firstChildElement("w:body");
|
||||
while(!body.isNull())
|
||||
|
@ -529,7 +531,7 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
|||
{
|
||||
QDomElement wt = wr.firstChildElement("w:t");
|
||||
textcontent.append(wt.text().replace("\n",""));
|
||||
if(textcontent.length() >= 682666) //20480000/3
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||
{
|
||||
file.close();
|
||||
return;
|
||||
|
@ -544,13 +546,86 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
|||
return;
|
||||
}
|
||||
|
||||
void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
|
||||
{
|
||||
//fix me :optimized by xpath??
|
||||
QFileInfo info = QFileInfo(path);
|
||||
if(!info.exists()||info.isDir())
|
||||
return;
|
||||
QuaZip file(path);
|
||||
if(!file.open(QuaZip::mdUnzip))
|
||||
return;
|
||||
QString prefix("ppt/slides/slide");
|
||||
QStringList fileList;
|
||||
for(QString i : file.getFileNameList())
|
||||
{
|
||||
if(i.startsWith(prefix))
|
||||
fileList<<i;
|
||||
}
|
||||
if(fileList.isEmpty())
|
||||
return;
|
||||
QDomElement sptree;
|
||||
QDomElement sp;
|
||||
QDomElement txbody;
|
||||
QDomElement ap;
|
||||
QDomElement ar;
|
||||
QDomElement at;
|
||||
for(int i =0;i<fileList.size();++i)
|
||||
{
|
||||
QString name = prefix + QString::number(i+1) + ".xml";
|
||||
if(!file.setCurrentFile(name))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
QuaZipFile fileR(&file);
|
||||
fileR.open(QIODevice::ReadOnly);
|
||||
QDomDocument doc;
|
||||
doc.setContent(fileR.readAll());
|
||||
fileR.close();
|
||||
sptree = doc.firstChildElement("p:sld").firstChildElement("p:cSld").firstChildElement("p:spTree");
|
||||
while(!sptree.isNull())
|
||||
{
|
||||
sp= sptree.firstChildElement("p:sp");
|
||||
while(!sp.isNull())
|
||||
{
|
||||
txbody= sp.firstChildElement("p:txBody");
|
||||
while(!txbody.isNull())
|
||||
{
|
||||
ap = txbody.firstChildElement("a:p");
|
||||
while(!ap.isNull())
|
||||
{
|
||||
ar = ap.firstChildElement("a:r");
|
||||
while(!ar.isNull())
|
||||
{
|
||||
at = ar.firstChildElement("a:t");
|
||||
textcontent.append(at.text().replace("\r","")).replace("\t"," ");
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||
{
|
||||
file.close();
|
||||
return;
|
||||
}
|
||||
ar = ar.nextSiblingElement();
|
||||
}
|
||||
ap = ap.nextSiblingElement();
|
||||
}
|
||||
txbody = txbody.nextSiblingElement();
|
||||
}
|
||||
sp = sp.nextSiblingElement();
|
||||
}
|
||||
sptree = sptree.nextSiblingElement();
|
||||
}
|
||||
}
|
||||
file.close();
|
||||
return;
|
||||
}
|
||||
|
||||
void FileUtils::getTxtContent(QString &path, QString &textcontent)
|
||||
{
|
||||
QFile file(path);
|
||||
if(!file.open(QIODevice::ReadOnly|QIODevice::Text))
|
||||
return;
|
||||
|
||||
QByteArray encodedString = file.read(20480000);
|
||||
QByteArray encodedString = file.read(MAX_CONTENT_LENGTH);
|
||||
|
||||
uchardet_t chardet = uchardet_new();
|
||||
if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0)
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
//#define INITIAL_STATE 0
|
||||
//#define CREATING_INDEX 1
|
||||
//#define FINISH_CREATING_INDEX 2
|
||||
#define MAX_CONTENT_LENGTH 20480000
|
||||
|
||||
#define UKUI_SEARCH_PIPE_PATH (QDir::homePath()+"/.config/org.ukui/ukui-search/ukuisearch").toLocal8Bit().constData()
|
||||
|
||||
|
@ -64,6 +65,7 @@ public:
|
|||
//parse text,docx.....
|
||||
static QMimeType getMimetype(QString &path);
|
||||
static void getDocxTextContent(QString &path, QString &textcontent);
|
||||
static void getPptxTextContent(QString &path, QString &textcontent);
|
||||
static void getTxtContent(QString &path, QString &textcontent);
|
||||
static size_t _max_index_count;
|
||||
static size_t _current_index_count; //this one has been Abandoned,do not use it.
|
||||
|
|
|
@ -36,6 +36,8 @@ void FileReader::getTextContent(QString path, QString &textContent)
|
|||
{
|
||||
if(strsfx.endsWith( "docx"))
|
||||
FileUtils::getDocxTextContent(path,textContent);
|
||||
if(strsfx.endsWith( "pptx"))
|
||||
FileUtils::getPptxTextContent(path,textContent);
|
||||
}
|
||||
else if(name == "text/plain")
|
||||
{
|
||||
|
|
|
@ -67,9 +67,9 @@ private:
|
|||
std::map<QString, bool>::value_type("doc", true),
|
||||
std::map<QString, bool>::value_type("docx", true),
|
||||
std::map<QString, bool>::value_type("ppt", true),
|
||||
// std::map<QString, bool>::value_type(".pptx", true),
|
||||
std::map<QString, bool>::value_type("pptx", true),
|
||||
std::map<QString, bool>::value_type("xls", true),
|
||||
// std::map<QString, bool>::value_type(".xlsx", true),
|
||||
// std::map<QString, bool>::value_type("xlsx", true),
|
||||
std::map<QString, bool>::value_type("txt", true),
|
||||
std::map<QString, bool>::value_type("dot", true),
|
||||
std::map<QString, bool>::value_type("wps", true),
|
||||
|
|
|
@ -65,9 +65,9 @@ private:
|
|||
std::map<QString, bool>::value_type("doc", true),
|
||||
std::map<QString, bool>::value_type("docx", true),
|
||||
std::map<QString, bool>::value_type("ppt", true),
|
||||
// std::map<QString, bool>::value_type(".pptx", true),
|
||||
std::map<QString, bool>::value_type("pptx", true),
|
||||
std::map<QString, bool>::value_type("xls", true),
|
||||
// std::map<QString, bool>::value_type(".xlsx", true),
|
||||
// std::map<QString, bool>::value_type("xlsx", true),
|
||||
std::map<QString, bool>::value_type("txt", true),
|
||||
std::map<QString, bool>::value_type("dot", true),
|
||||
std::map<QString, bool>::value_type("wps", true),
|
||||
|
|
Loading…
Reference in New Issue