ukui-search/libsearch/file-utils.cpp

691 lines
23 KiB
C++
Raw Normal View History

2021-01-29 11:43:07 +08:00
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
* Modified by: zhangzihao <zhangzihao@kylinos.cn>
* Modified by: zhangjiaping <zhangjiaping@kylinos.cn>
*
*/
2020-12-21 18:50:54 +08:00
#include "file-utils.h"
2021-01-10 09:23:02 +08:00
size_t FileUtils::_max_index_count = 0;
size_t FileUtils::_current_index_count = 0;
unsigned short FileUtils::_index_status = 0;
2021-04-16 15:35:54 +08:00
FileUtils::SearchMethod FileUtils::searchMethod = FileUtils::SearchMethod::DIRECTSEARCH;
QMap<QString, QStringList> FileUtils::map_chinese2pinyin = QMap<QString, QStringList>();
2020-12-21 18:50:54 +08:00
2021-04-26 15:06:47 +08:00
FileUtils::FileUtils() {
2020-12-21 18:50:54 +08:00
}
2021-04-26 15:06:47 +08:00
std::string FileUtils::makeDocUterm(QString path) {
return QCryptographicHash::hash(path.toUtf8(), QCryptographicHash::Md5).toHex().toStdString();
2020-12-21 18:50:54 +08:00
}
/**
* @brief FileUtils::getFileIcon
* @param uri "file:///home/xxx/xxx/xxxx.txt"
* @param checkValid
* @return
*/
2021-04-26 15:06:47 +08:00
QIcon FileUtils::getFileIcon(const QString &uri, bool checkValid) {
auto file = wrapGFile(g_file_new_for_uri(uri.toUtf8().constData()));
auto info = wrapGFileInfo(g_file_query_info(file.get()->get(),
2021-04-26 15:06:47 +08:00
G_FILE_ATTRIBUTE_STANDARD_ICON,
G_FILE_QUERY_INFO_NONE,
nullptr,
nullptr));
if(!G_IS_FILE_INFO(info.get()->get()))
return QIcon::fromTheme("unknown");
2021-04-26 15:06:47 +08:00
GIcon *g_icon = g_file_info_get_icon(info.get()->get());
QString icon_name;
//do not unref the GIcon from info.
2021-04-26 15:06:47 +08:00
if(G_IS_ICON(g_icon)) {
const gchar* const* icon_names = g_themed_icon_get_names(G_THEMED_ICON(g_icon));
if(icon_names) {
auto p = icon_names;
2021-04-26 15:06:47 +08:00
if(*p)
icon_name = QString(*p);
if(checkValid) {
while(*p) {
QIcon icon = QIcon::fromTheme(*p);
2021-04-26 15:06:47 +08:00
if(!icon.isNull()) {
icon_name = QString(*p);
break;
} else {
p++;
}
}
}
}
}
2021-04-26 15:06:47 +08:00
if(QIcon::fromTheme(icon_name).isNull()) {
return QIcon::fromTheme("unknown");
}
return QIcon::fromTheme(icon_name);
}
/**
* @brief FileUtils::getAppIcon
* @param path .desktop文件的完整路径
* @return
*/
QIcon FileUtils::getAppIcon(const QString &path) {
QByteArray ba;
ba = path.toUtf8();
GKeyFile * keyfile;
keyfile = g_key_file_new();
2021-04-26 15:06:47 +08:00
if(!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)) {
g_key_file_free(keyfile);
return QIcon::fromTheme("unknown");
}
QString icon = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_ICON, NULL, NULL));
g_key_file_free(keyfile);
2021-04-26 15:06:47 +08:00
if(QIcon::fromTheme(icon).isNull()) {
return QIcon(":/res/icons/desktop.png");
}
return QIcon::fromTheme(icon);
}
/**
* @brief FileUtils::getSettingIcon
* @param setting About/About->Properties
* @param is_white
* @return
*/
QIcon FileUtils::getSettingIcon(const QString& setting, const bool& is_white) {
QString name = setting.left(setting.indexOf("/"));
2021-04-26 15:06:47 +08:00
if(! name.isEmpty()) {
name.replace(QString(name.at(0)), QString(name.at(0).toUpper()));
}
QString path;
2021-04-26 15:06:47 +08:00
if(is_white) {
path = QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1White.svg").arg(name);
} else {
path = QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1.svg").arg(name);
}
QFile file(path);
2021-04-26 15:06:47 +08:00
if(file.exists()) {
return QIcon(path);
} else {
return QIcon::fromTheme("ukui-control-center"); //无插件图标时,返回控制面板应用图标
// if (is_white) {
// return QIcon(QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1White.svg").arg("About"));
// } else {
// return QIcon(QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1.svg").arg("About"));
// }
}
}
/**
* @brief FileUtils::getFileName
* @param uri "file:///home/xxx/xxx/xxxx.txt"
* @return
*/
QString FileUtils::getFileName(const QString& uri) {
QFileInfo info(uri);
2021-04-26 15:06:47 +08:00
if(info.exists()) {
return info.fileName();
} else {
return "Unknown File";
}
// QUrl url = uri;
// if (url.fileName().isEmpty()) {
// return "Unknown File";
// }
// return url.fileName();
}
/**
* @brief FileUtils::getAppName
* @param path .destop文件的完整路径
* @return
*/
QString FileUtils::getAppName(const QString& path) {
QByteArray ba;
ba = path.toUtf8();
GKeyFile * keyfile;
keyfile = g_key_file_new();
2021-04-26 15:06:47 +08:00
if(!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)) {
g_key_file_free(keyfile);
return "Unknown App";
}
QString name = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_NAME, NULL, NULL));
g_key_file_free(keyfile);
return name;
}
/**
* @brief FileUtils::getSettingName
* @param setting About/About->Properties
* @return
*/
QString FileUtils::getSettingName(const QString& setting) {
return setting.right(setting.length() - setting.lastIndexOf("/") - 1);
}
2021-04-26 15:06:47 +08:00
void FileUtils::loadHanziTable(const QString &fileName) {
QFile file(fileName);
2021-04-26 15:06:47 +08:00
if(!file.open(QFile::ReadOnly | QFile::Text)) {
qDebug("File: '%s' open failed!", file.fileName().toStdString().c_str());
return;
}
/* 读取汉字对照表文件并转换为QMap存储 */
while(!file.atEnd()) {
QString content = QString::fromUtf8(file.readLine());
FileUtils::map_chinese2pinyin[content.split(" ").last().trimmed()] = content.split(" ").first().split(",");
}
file.close();
return;
}
2021-04-26 15:06:47 +08:00
QMimeType FileUtils::getMimetype(QString &path) {
2020-12-29 20:18:36 +08:00
QMimeDatabase mdb;
2021-04-26 15:06:47 +08:00
QMimeType type = mdb.mimeTypeForFile(path, QMimeDatabase::MatchContent);
2021-03-04 14:10:00 +08:00
return type;
2020-12-29 20:18:36 +08:00
}
//aborted
2021-04-26 15:06:47 +08:00
QString FileUtils::find(const QString &hanzi) {
// static QMap<QString, QStringList> map = loadHanziTable("://index/pinyinWithoutTone.txt");
// static QMap<QString, QStringList> map;
QString output;
QStringList stringList = hanzi.split("");
/* 遍历查找汉字-拼音对照表的内容并将汉字替换为拼音 */
2021-04-26 15:06:47 +08:00
for(const QString &str : stringList) {
if(FileUtils::map_chinese2pinyin.contains(str))
output += FileUtils::map_chinese2pinyin[str].first();
else
output += str;
}
return output;
}
//DFS多音字太多直接GG
2021-04-26 15:06:47 +08:00
void stitchMultiToneWordsDFS(const QString& hanzi, const QString& resultAllPinYin, const QString& resultFirst, QStringList& resultList) {
if(hanzi.size() == 0) {
2020-12-29 19:30:48 +08:00
resultList.append(resultAllPinYin);
resultList.append(resultFirst);
return;
}
2021-04-26 15:06:47 +08:00
if(FileUtils::map_chinese2pinyin.contains(hanzi.at(0))) {
for(auto i : FileUtils::map_chinese2pinyin[hanzi.at(0)]) {
2020-12-29 19:30:48 +08:00
stitchMultiToneWordsDFS(hanzi.right(hanzi.size() - 1), resultAllPinYin + i, resultFirst + i.at(0), resultList);
}
2021-04-26 15:06:47 +08:00
} else {
2020-12-29 19:30:48 +08:00
stitchMultiToneWordsDFS(hanzi.right(hanzi.size() - 1), resultAllPinYin + hanzi.at(0), resultFirst + hanzi.at(0), resultList);
}
}
//BFS+Stack多音字太多会爆栈
2021-04-26 15:06:47 +08:00
void stitchMultiToneWordsBFSStack(const QString& hanzi, QStringList& resultList) {
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString> tempQueue;
tempHanzi = hanzi;
int tempQueueSize = 0;
2021-04-26 15:06:47 +08:00
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue.enqueue(i);
}
2021-04-26 15:06:47 +08:00
} else {
tempQueue.enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
2021-04-26 15:06:47 +08:00
while(tempHanzi.size() != 0) {
tempQueueSize = tempQueue.size();
2021-04-26 15:06:47 +08:00
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(int j = 0; j < tempQueueSize; ++j) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue.enqueue(tempQueue.head() + i);
}
tempQueue.dequeue();
}
2021-04-26 15:06:47 +08:00
} else {
for(int j = 0; j < tempQueueSize; ++j) {
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
tempQueue.dequeue();
}
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
2021-04-26 15:06:47 +08:00
while(!tempQueue.empty()) {
resultList.append(tempQueue.dequeue());
}
}
//BFS+Heap多音字太多会耗尽内存
2021-04-26 15:06:47 +08:00
void stitchMultiToneWordsBFSHeap(const QString& hanzi, QStringList& resultList) {
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString>* tempQueue = new QQueue<QString>;
tempHanzi = hanzi;
int tempQueueSize = 0;
2021-04-26 15:06:47 +08:00
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue->enqueue(i);
}
2021-04-26 15:06:47 +08:00
} else {
tempQueue->enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
2021-04-26 15:06:47 +08:00
while(tempHanzi.size() != 0) {
tempQueueSize = tempQueue->size();
2021-04-26 15:06:47 +08:00
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(int j = 0; j < tempQueueSize; ++j) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue->enqueue(tempQueue->head() + i);
}
tempQueue->dequeue();
}
2021-04-26 15:06:47 +08:00
} else {
for(int j = 0; j < tempQueueSize; ++j) {
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
tempQueue->dequeue();
}
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
2021-04-26 15:06:47 +08:00
while(!tempQueue->empty()) {
resultList.append(tempQueue->dequeue());
}
delete tempQueue;
tempQueue = nullptr;
}
//BFS+Heap+超过3个多音字只建一个索引比较折中的方案
2021-04-26 15:06:47 +08:00
void stitchMultiToneWordsBFSHeapLess3(const QString& hanzi, QStringList& resultList) {
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString>* tempQueue = new QQueue<QString>;
QQueue<QString>* tempQueueFirst = new QQueue<QString>;
tempHanzi = hanzi;
int tempQueueSize = 0;
int multiToneWordNum = 0;
2021-04-26 15:06:47 +08:00
for(auto i : hanzi) {
if(FileUtils::map_chinese2pinyin.contains(i)) {
if(FileUtils::map_chinese2pinyin[i].size() > 1) {
++multiToneWordNum;
}
}
}
2021-04-26 15:06:47 +08:00
if(multiToneWordNum > 3) {
QString oneResult, oneResultFirst;
2021-04-26 15:06:47 +08:00
for(auto i : hanzi) {
if(FileUtils::map_chinese2pinyin.contains(i)) {
oneResult += FileUtils::map_chinese2pinyin[i].first();
oneResultFirst += FileUtils::map_chinese2pinyin[i].first().at(0);
2021-04-26 15:06:47 +08:00
} else {
oneResult += i;
oneResultFirst += i;
}
}
resultList.append(oneResult);
resultList.append(oneResultFirst);
return;
}
2021-04-26 15:06:47 +08:00
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue->enqueue(i);
tempQueueFirst->enqueue(i.at(0));
}
2021-04-26 15:06:47 +08:00
} else {
tempQueue->enqueue(tempHanzi.at(0));
tempQueueFirst->enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
2021-04-26 15:06:47 +08:00
while(tempHanzi.size() != 0) {
tempQueueSize = tempQueue->size();
2021-04-26 15:06:47 +08:00
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(int j = 0; j < tempQueueSize; ++j) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue->enqueue(tempQueue->head() + i);
tempQueueFirst->enqueue(tempQueueFirst->head() + i.at(0));
}
tempQueue->dequeue();
tempQueueFirst->dequeue();
}
2021-04-26 15:06:47 +08:00
} else {
for(int j = 0; j < tempQueueSize; ++j) {
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
tempQueueFirst->enqueue(tempQueueFirst->head() + tempHanzi.at(0));
tempQueue->dequeue();
tempQueueFirst->dequeue();
}
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
2021-04-26 15:06:47 +08:00
while(!tempQueue->empty()) {
resultList.append(tempQueue->dequeue());
resultList.append(tempQueueFirst->dequeue());
}
delete tempQueue;
delete tempQueueFirst;
tempQueue = nullptr;
tempQueueFirst = nullptr;
return;
}
//BFS+Stack+超过3个多音字只建一个索引比较折中的方案
2021-04-26 15:06:47 +08:00
void stitchMultiToneWordsBFSStackLess3(const QString& hanzi, QStringList& resultList) {
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString> tempQueue;
QQueue<QString> tempQueueFirst;
tempHanzi = hanzi;
int tempQueueSize = 0;
int multiToneWordNum = 0;
2021-04-26 15:06:47 +08:00
for(auto i : hanzi) {
if(FileUtils::map_chinese2pinyin.contains(i)) {
if(FileUtils::map_chinese2pinyin[i].size() > 1) {
++multiToneWordNum;
}
}
}
2021-04-26 15:06:47 +08:00
if(multiToneWordNum > 3) {
QString oneResult, oneResultFirst;
2021-04-26 15:06:47 +08:00
for(auto i : hanzi) {
if(FileUtils::map_chinese2pinyin.contains(i)) {
oneResult += FileUtils::map_chinese2pinyin[i].first();
oneResultFirst += FileUtils::map_chinese2pinyin[i].first().at(0);
2021-04-26 15:06:47 +08:00
} else {
oneResult += i;
oneResultFirst += i;
}
}
resultList.append(oneResult);
resultList.append(oneResultFirst);
return;
}
2021-04-26 15:06:47 +08:00
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue.enqueue(i);
tempQueueFirst.enqueue(i.at(0));
}
2021-04-26 15:06:47 +08:00
} else {
tempQueue.enqueue(tempHanzi.at(0));
tempQueueFirst.enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
2021-04-26 15:06:47 +08:00
while(tempHanzi.size() != 0) {
tempQueueSize = tempQueue.size();
2021-04-26 15:06:47 +08:00
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(int j = 0; j < tempQueueSize; ++j) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue.enqueue(tempQueue.head() + i);
tempQueueFirst.enqueue(tempQueueFirst.head() + i.at(0));
}
tempQueue.dequeue();
tempQueueFirst.dequeue();
}
2021-04-26 15:06:47 +08:00
} else {
for(int j = 0; j < tempQueueSize; ++j) {
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
tempQueueFirst.enqueue(tempQueueFirst.head() + tempHanzi.at(0));
tempQueue.dequeue();
tempQueueFirst.dequeue();
}
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
2021-04-26 15:06:47 +08:00
while(!tempQueue.empty()) {
resultList.append(tempQueue.dequeue());
resultList.append(tempQueueFirst.dequeue());
}
// delete tempQueue;
// delete tempQueueFirst;
// tempQueue = nullptr;
// tempQueueFirst = nullptr;
return;
}
2021-04-26 15:06:47 +08:00
QStringList FileUtils::findMultiToneWords(const QString& hanzi) {
// QStringList* output = new QStringList();
QStringList output;
2020-12-29 19:30:48 +08:00
QString tempAllPinYin, tempFirst;
QStringList stringList = hanzi.split("");
// stitchMultiToneWordsDFS(hanzi, tempAllPinYin, tempFirst, output);
stitchMultiToneWordsBFSStackLess3(hanzi, output);
// qDebug() << output;
return output;
2020-12-29 20:38:47 +08:00
}
2020-12-29 20:18:36 +08:00
/**
* @brief FileUtils::getDocxTextContent
* @param path: abs path
* @return docx to QString
*/
2021-04-26 15:06:47 +08:00
void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
2021-04-08 16:11:58 +08:00
//fix me :optimized by xpath??
2020-12-29 20:18:36 +08:00
QFileInfo info = QFileInfo(path);
2021-04-26 15:06:47 +08:00
if(!info.exists() || info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
2020-12-29 20:18:36 +08:00
2021-04-26 15:06:47 +08:00
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive))
return;
2020-12-29 20:18:36 +08:00
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly); //读取方式打开
QDomDocument doc;
doc.setContent(fileR.readAll());
2021-04-08 16:11:58 +08:00
fileR.close();
2020-12-29 20:18:36 +08:00
QDomElement first = doc.firstChildElement("w:document");
QDomElement body = first.firstChildElement("w:body");
2021-04-26 15:06:47 +08:00
while(!body.isNull()) {
QDomElement wp = body.firstChildElement("w:p");
while(!wp.isNull()) {
QDomElement wr = wp.firstChildElement("w:r");
while(!wr.isNull()) {
QDomElement wt = wr.firstChildElement("w:t");
2021-04-26 15:06:47 +08:00
textcontent.append(wt.text().replace("\n", ""));
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close();
return;
}
wr = wr.nextSiblingElement();
}
wp = wp.nextSiblingElement();
2020-12-29 20:18:36 +08:00
}
body = body.nextSiblingElement();
2020-12-29 20:18:36 +08:00
}
file.close();
return;
2020-12-29 20:18:36 +08:00
}
2021-04-26 15:06:47 +08:00
void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
2021-04-08 16:11:58 +08:00
QFileInfo info = QFileInfo(path);
2021-04-26 15:06:47 +08:00
if(!info.exists() || info.isDir())
2021-04-08 16:11:58 +08:00
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
QString prefix("ppt/slides/slide");
QStringList fileList;
2021-04-26 15:06:47 +08:00
for(QString i : file.getFileNameList()) {
2021-04-08 16:11:58 +08:00
if(i.startsWith(prefix))
2021-04-26 15:06:47 +08:00
fileList << i;
2021-04-08 16:11:58 +08:00
}
if(fileList.isEmpty())
return;
QDomElement sptree;
QDomElement sp;
QDomElement txbody;
QDomElement ap;
QDomElement ar;
2021-04-13 13:57:02 +08:00
QDomDocument doc;
2021-04-08 16:11:58 +08:00
QDomElement at;
2021-04-13 13:57:02 +08:00
// QDomNodeList atList;
2021-04-26 15:06:47 +08:00
for(int i = 0; i < fileList.size(); ++i) {
QString name = prefix + QString::number(i + 1) + ".xml";
if(!file.setCurrentFile(name)) {
2021-04-08 16:11:58 +08:00
continue;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly);
2021-04-13 13:57:02 +08:00
doc.clear();
2021-04-08 16:11:58 +08:00
doc.setContent(fileR.readAll());
fileR.close();
2021-04-13 13:57:02 +08:00
//fix me :optimized by xpath??
//This method looks better but slower,
//If xml file is very large with many useless node,this method will take a lot of time.
// atList = doc.elementsByTagName("a:t");
// for(int i = 0; i<atList.size(); ++i)
// {
// at = atList.at(i).toElement();
// if(!at.isNull())
// {
// textcontent.append(at.text().replace("\r","")).replace("\t"," ");
// if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
// {
// file.close();
// return;
// }
// }
// }
//This is ugly but seems more efficient when handel a large file.
2021-04-08 16:11:58 +08:00
sptree = doc.firstChildElement("p:sld").firstChildElement("p:cSld").firstChildElement("p:spTree");
2021-04-26 15:06:47 +08:00
while(!sptree.isNull()) {
sp = sptree.firstChildElement("p:sp");
while(!sp.isNull()) {
txbody = sp.firstChildElement("p:txBody");
while(!txbody.isNull()) {
2021-04-08 16:11:58 +08:00
ap = txbody.firstChildElement("a:p");
2021-04-26 15:06:47 +08:00
while(!ap.isNull()) {
2021-04-08 16:11:58 +08:00
ar = ap.firstChildElement("a:r");
2021-04-26 15:06:47 +08:00
while(!ar.isNull()) {
2021-04-08 16:11:58 +08:00
at = ar.firstChildElement("a:t");
2021-04-26 15:06:47 +08:00
textcontent.append(at.text().replace("\r", "")).replace("\t", "");
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
2021-04-08 16:11:58 +08:00
file.close();
return;
}
ar = ar.nextSiblingElement();
}
ap = ap.nextSiblingElement();
}
txbody = txbody.nextSiblingElement();
}
sp = sp.nextSiblingElement();
}
sptree = sptree.nextSiblingElement();
}
}
file.close();
return;
}
2021-04-26 15:06:47 +08:00
void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
2021-04-13 14:53:55 +08:00
QFileInfo info = QFileInfo(path);
2021-04-26 15:06:47 +08:00
if(!info.exists() || info.isDir())
2021-04-13 14:53:55 +08:00
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
2021-04-26 15:06:47 +08:00
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive))
2021-04-13 14:53:55 +08:00
return;
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly); //读取方式打开
QDomDocument doc;
doc.setContent(fileR.readAll());
fileR.close();
QDomElement sst = doc.firstChildElement("sst");
QDomElement si;
QDomElement r;
QDomElement t;
2021-04-26 15:06:47 +08:00
while(!sst.isNull()) {
si = sst.firstChildElement("si");
while(!si.isNull()) {
r = si.firstChildElement("r");
if(r.isNull()) {
t = si.firstChildElement("t");
} else {
2021-04-13 14:53:55 +08:00
t = r.firstChildElement("t");
}
if(t.isNull())
continue;
2021-04-26 15:06:47 +08:00
textcontent.append(t.text().replace("\r", "").replace("\n", ""));
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
2021-04-13 14:53:55 +08:00
file.close();
return;
}
si = si.nextSiblingElement();
}
sst = sst.nextSiblingElement();
}
file.close();
return;
}
2021-04-26 15:06:47 +08:00
void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
2021-04-15 09:19:36 +08:00
Poppler::Document *doc = Poppler::Document::load(path);
if(doc->isLocked())
return;
const QRectF qf;
int pageNum = doc->numPages();
2021-04-26 15:06:47 +08:00
for(int i = 0; i < pageNum; ++i) {
textcontent.append(doc->page(i)->text(qf).replace("\n", ""));
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
2021-04-15 09:19:36 +08:00
break;
}
delete doc;
return;
}
2021-04-26 15:06:47 +08:00
void FileUtils::getTxtContent(QString &path, QString &textcontent) {
2020-12-29 20:18:36 +08:00
QFile file(path);
2021-04-26 15:06:47 +08:00
if(!file.open(QIODevice::ReadOnly | QIODevice::Text))
return;
2021-04-08 16:11:58 +08:00
QByteArray encodedString = file.read(MAX_CONTENT_LENGTH);
uchardet_t chardet = uchardet_new();
2021-04-26 15:06:47 +08:00
if(uchardet_handle_data(chardet, encodedString.constData(), encodedString.size()) != 0)
qWarning() << "Txt file encoding format detect fail!" << path;
uchardet_data_end(chardet);
const char *codec = uchardet_get_charset(chardet);
if(QTextCodec::codecForName(codec) == 0)
2021-04-26 15:06:47 +08:00
qWarning() << "Unsupported Text encoding format" << path << QString::fromLocal8Bit(codec);
2021-04-26 15:06:47 +08:00
QTextStream stream(encodedString, QIODevice::ReadOnly);
stream.setCodec(codec);
2021-02-07 10:11:30 +08:00
uchardet_delete(chardet);
2021-04-26 15:06:47 +08:00
textcontent = stream.readAll().replace("\n", "");
file.close();
encodedString.clear();
chardet = NULL;
stream.flush();
return;
}