Merge branch '0330-dev' into 'main'
Add support for 'pptx'. See merge request kylin-desktop/ukui-search!11
This commit is contained in:
commit
c069f96fa9
|
@ -502,6 +502,7 @@ QStringList FileUtils::findMultiToneWords(const QString& hanzi)
|
|||
*/
|
||||
void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
||||
{
|
||||
//fix me :optimized by xpath??
|
||||
QFileInfo info = QFileInfo(path);
|
||||
if(!info.exists()||info.isDir())
|
||||
return;
|
||||
|
@ -517,6 +518,7 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
|||
|
||||
QDomDocument doc;
|
||||
doc.setContent(fileR.readAll());
|
||||
fileR.close();
|
||||
QDomElement first = doc.firstChildElement("w:document");
|
||||
QDomElement body = first.firstChildElement("w:body");
|
||||
while(!body.isNull())
|
||||
|
@ -529,7 +531,7 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
|||
{
|
||||
QDomElement wt = wr.firstChildElement("w:t");
|
||||
textcontent.append(wt.text().replace("\n",""));
|
||||
if(textcontent.length() >= 682666) //20480000/3
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||
{
|
||||
file.close();
|
||||
return;
|
||||
|
@ -544,13 +546,159 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
|||
return;
|
||||
}
|
||||
|
||||
void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
|
||||
{
|
||||
QFileInfo info = QFileInfo(path);
|
||||
if(!info.exists()||info.isDir())
|
||||
return;
|
||||
QuaZip file(path);
|
||||
if(!file.open(QuaZip::mdUnzip))
|
||||
return;
|
||||
QString prefix("ppt/slides/slide");
|
||||
QStringList fileList;
|
||||
for(QString i : file.getFileNameList())
|
||||
{
|
||||
if(i.startsWith(prefix))
|
||||
fileList<<i;
|
||||
}
|
||||
if(fileList.isEmpty())
|
||||
return;
|
||||
QDomElement sptree;
|
||||
QDomElement sp;
|
||||
QDomElement txbody;
|
||||
QDomElement ap;
|
||||
QDomElement ar;
|
||||
QDomDocument doc;
|
||||
QDomElement at;
|
||||
// QDomNodeList atList;
|
||||
for(int i =0;i<fileList.size();++i)
|
||||
{
|
||||
QString name = prefix + QString::number(i+1) + ".xml";
|
||||
if(!file.setCurrentFile(name))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
QuaZipFile fileR(&file);
|
||||
fileR.open(QIODevice::ReadOnly);
|
||||
doc.clear();
|
||||
doc.setContent(fileR.readAll());
|
||||
fileR.close();
|
||||
|
||||
//fix me :optimized by xpath??
|
||||
//This method looks better but slower,
|
||||
//If xml file is very large with many useless node,this method will take a lot of time.
|
||||
|
||||
// atList = doc.elementsByTagName("a:t");
|
||||
// for(int i = 0; i<atList.size(); ++i)
|
||||
// {
|
||||
// at = atList.at(i).toElement();
|
||||
// if(!at.isNull())
|
||||
// {
|
||||
// textcontent.append(at.text().replace("\r","")).replace("\t"," ");
|
||||
// if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||
// {
|
||||
// file.close();
|
||||
// return;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//This is ugly but seems more efficient when handel a large file.
|
||||
sptree = doc.firstChildElement("p:sld").firstChildElement("p:cSld").firstChildElement("p:spTree");
|
||||
while(!sptree.isNull())
|
||||
{
|
||||
sp= sptree.firstChildElement("p:sp");
|
||||
while(!sp.isNull())
|
||||
{
|
||||
txbody= sp.firstChildElement("p:txBody");
|
||||
while(!txbody.isNull())
|
||||
{
|
||||
ap = txbody.firstChildElement("a:p");
|
||||
while(!ap.isNull())
|
||||
{
|
||||
ar = ap.firstChildElement("a:r");
|
||||
while(!ar.isNull())
|
||||
{
|
||||
at = ar.firstChildElement("a:t");
|
||||
textcontent.append(at.text().replace("\r","")).replace("\t"," ");
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||
{
|
||||
file.close();
|
||||
return;
|
||||
}
|
||||
ar = ar.nextSiblingElement();
|
||||
}
|
||||
ap = ap.nextSiblingElement();
|
||||
}
|
||||
txbody = txbody.nextSiblingElement();
|
||||
}
|
||||
sp = sp.nextSiblingElement();
|
||||
}
|
||||
sptree = sptree.nextSiblingElement();
|
||||
}
|
||||
}
|
||||
file.close();
|
||||
return;
|
||||
}
|
||||
|
||||
void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
|
||||
{
|
||||
QFileInfo info = QFileInfo(path);
|
||||
if(!info.exists()||info.isDir())
|
||||
return;
|
||||
QuaZip file(path);
|
||||
if(!file.open(QuaZip::mdUnzip))
|
||||
return;
|
||||
|
||||
if(!file.setCurrentFile("xl/sharedStrings.xml",QuaZip::csSensitive))
|
||||
return;
|
||||
QuaZipFile fileR(&file);
|
||||
|
||||
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
||||
|
||||
QDomDocument doc;
|
||||
doc.setContent(fileR.readAll());
|
||||
fileR.close();
|
||||
QDomElement sst = doc.firstChildElement("sst");
|
||||
QDomElement si;
|
||||
QDomElement r;
|
||||
QDomElement t;
|
||||
while(!sst.isNull())
|
||||
{
|
||||
si= sst.firstChildElement("si");
|
||||
while(!si.isNull())
|
||||
{
|
||||
r= si.firstChildElement("r");
|
||||
if(r.isNull())
|
||||
{
|
||||
t= si.firstChildElement("t");
|
||||
}
|
||||
else
|
||||
{
|
||||
t = r.firstChildElement("t");
|
||||
}
|
||||
if(t.isNull())
|
||||
continue;
|
||||
textcontent.append(t.text().replace("\r","").replace("\n"," "));
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
||||
{
|
||||
file.close();
|
||||
return;
|
||||
}
|
||||
si = si.nextSiblingElement();
|
||||
}
|
||||
sst = sst.nextSiblingElement();
|
||||
}
|
||||
file.close();
|
||||
return;
|
||||
}
|
||||
|
||||
void FileUtils::getTxtContent(QString &path, QString &textcontent)
|
||||
{
|
||||
QFile file(path);
|
||||
if(!file.open(QIODevice::ReadOnly|QIODevice::Text))
|
||||
return;
|
||||
|
||||
QByteArray encodedString = file.read(20480000);
|
||||
QByteArray encodedString = file.read(MAX_CONTENT_LENGTH);
|
||||
|
||||
uchardet_t chardet = uchardet_new();
|
||||
if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0)
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
//#define INITIAL_STATE 0
|
||||
//#define CREATING_INDEX 1
|
||||
//#define FINISH_CREATING_INDEX 2
|
||||
#define MAX_CONTENT_LENGTH 20480000
|
||||
|
||||
#define UKUI_SEARCH_PIPE_PATH (QDir::homePath()+"/.config/org.ukui/ukui-search/ukuisearch").toLocal8Bit().constData()
|
||||
|
||||
|
@ -64,6 +65,8 @@ public:
|
|||
//parse text,docx.....
|
||||
static QMimeType getMimetype(QString &path);
|
||||
static void getDocxTextContent(QString &path, QString &textcontent);
|
||||
static void getPptxTextContent(QString &path, QString &textcontent);
|
||||
static void getXlsxTextContent(QString &path, QString &textcontent);
|
||||
static void getTxtContent(QString &path, QString &textcontent);
|
||||
static size_t _max_index_count;
|
||||
static size_t _current_index_count; //this one has been Abandoned,do not use it.
|
||||
|
|
|
@ -36,6 +36,10 @@ void FileReader::getTextContent(QString path, QString &textContent)
|
|||
{
|
||||
if(strsfx.endsWith( "docx"))
|
||||
FileUtils::getDocxTextContent(path,textContent);
|
||||
if(strsfx.endsWith( "pptx"))
|
||||
FileUtils::getPptxTextContent(path,textContent);
|
||||
if(strsfx.endsWith( "xlsx"))
|
||||
FileUtils::getXlsxTextContent(path,textContent);
|
||||
}
|
||||
else if(name == "text/plain")
|
||||
{
|
||||
|
|
|
@ -67,9 +67,9 @@ private:
|
|||
std::map<QString, bool>::value_type("doc", true),
|
||||
std::map<QString, bool>::value_type("docx", true),
|
||||
std::map<QString, bool>::value_type("ppt", true),
|
||||
// std::map<QString, bool>::value_type("pptx", true),
|
||||
std::map<QString, bool>::value_type("pptx", true),
|
||||
std::map<QString, bool>::value_type("xls", true),
|
||||
// std::map<QString, bool>::value_type("xlsx", true),
|
||||
std::map<QString, bool>::value_type("xlsx", true),
|
||||
std::map<QString, bool>::value_type("txt", true),
|
||||
std::map<QString, bool>::value_type("dot", true),
|
||||
std::map<QString, bool>::value_type("wps", true),
|
||||
|
|
|
@ -65,9 +65,9 @@ private:
|
|||
std::map<QString, bool>::value_type("doc", true),
|
||||
std::map<QString, bool>::value_type("docx", true),
|
||||
std::map<QString, bool>::value_type("ppt", true),
|
||||
// std::map<QString, bool>::value_type("pptx", true),
|
||||
std::map<QString, bool>::value_type("pptx", true),
|
||||
std::map<QString, bool>::value_type("xls", true),
|
||||
// std::map<QString, bool>::value_type("xlsx", true),
|
||||
std::map<QString, bool>::value_type("xlsx", true),
|
||||
std::map<QString, bool>::value_type("txt", true),
|
||||
std::map<QString, bool>::value_type("dot", true),
|
||||
std::map<QString, bool>::value_type("wps", true),
|
||||
|
|
Loading…
Reference in New Issue