2021-01-29 11:43:07 +08:00
|
|
|
|
/*
|
|
|
|
|
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
|
|
|
*
|
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
*
|
|
|
|
|
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
|
|
|
|
|
* Modified by: zhangzihao <zhangzihao@kylinos.cn>
|
|
|
|
|
* Modified by: zhangjiaping <zhangjiaping@kylinos.cn>
|
|
|
|
|
*
|
|
|
|
|
*/
|
2020-12-21 18:50:54 +08:00
|
|
|
|
#include "file-utils.h"
|
2021-05-28 15:55:26 +08:00
|
|
|
|
#include <QXmlStreamReader>
|
2021-08-10 17:50:50 +08:00
|
|
|
|
#include <QMutexLocker>
|
2021-09-23 10:31:13 +08:00
|
|
|
|
#include <gio/gdesktopappinfo.h>
|
2021-11-05 10:38:04 +08:00
|
|
|
|
#include <QDBusMessage>
|
|
|
|
|
#include <QDBusConnection>
|
2021-11-22 16:03:31 +08:00
|
|
|
|
#include <QDomDocument>
|
2022-05-10 13:49:54 +08:00
|
|
|
|
#include <QDBusInterface>
|
|
|
|
|
#include <QDBusReply>
|
2021-11-22 16:03:31 +08:00
|
|
|
|
#include "gobject-template.h"
|
2022-05-17 15:33:19 +08:00
|
|
|
|
#include "hanzi-to-pinyin.h"
|
2021-01-22 09:49:44 +08:00
|
|
|
|
|
2021-12-14 14:43:35 +08:00
|
|
|
|
using namespace UkuiSearch;
|
2022-03-17 15:40:55 +08:00
|
|
|
|
size_t FileUtils::maxIndexCount = 0;
|
2021-11-09 10:07:41 +08:00
|
|
|
|
unsigned short FileUtils::indexStatus = 0;
|
2020-12-26 12:45:28 +08:00
|
|
|
|
QMap<QString, QStringList> FileUtils::map_chinese2pinyin = QMap<QString, QStringList>();
|
2021-08-10 17:50:50 +08:00
|
|
|
|
static QMutex iconMutex;
|
2020-12-21 18:50:54 +08:00
|
|
|
|
|
2021-04-26 15:06:47 +08:00
|
|
|
|
FileUtils::FileUtils() {
|
2020-12-21 18:50:54 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-04-26 15:06:47 +08:00
|
|
|
|
std::string FileUtils::makeDocUterm(QString path) {
|
|
|
|
|
return QCryptographicHash::hash(path.toUtf8(), QCryptographicHash::Md5).toHex().toStdString();
|
2020-12-21 18:50:54 +08:00
|
|
|
|
}
|
2020-12-24 11:06:19 +08:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getFileIcon 获取文件图标
|
|
|
|
|
* @param uri "file:///home/xxx/xxx/xxxx.txt"格式
|
|
|
|
|
* @param checkValid
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2021-04-26 15:06:47 +08:00
|
|
|
|
QIcon FileUtils::getFileIcon(const QString &uri, bool checkValid) {
|
2021-08-10 17:50:50 +08:00
|
|
|
|
QMutexLocker locker(&iconMutex);
|
2020-12-24 11:06:19 +08:00
|
|
|
|
auto file = wrapGFile(g_file_new_for_uri(uri.toUtf8().constData()));
|
|
|
|
|
auto info = wrapGFileInfo(g_file_query_info(file.get()->get(),
|
2021-04-26 15:06:47 +08:00
|
|
|
|
G_FILE_ATTRIBUTE_STANDARD_ICON,
|
|
|
|
|
G_FILE_QUERY_INFO_NONE,
|
|
|
|
|
nullptr,
|
|
|
|
|
nullptr));
|
|
|
|
|
if(!G_IS_FILE_INFO(info.get()->get()))
|
2021-12-16 15:59:13 +08:00
|
|
|
|
return QIcon::fromTheme("unknown",QIcon(":/res/icons/unknown.svg"));
|
2021-04-26 15:06:47 +08:00
|
|
|
|
GIcon *g_icon = g_file_info_get_icon(info.get()->get());
|
2021-09-13 21:05:49 +08:00
|
|
|
|
|
2020-12-24 11:06:19 +08:00
|
|
|
|
//do not unref the GIcon from info.
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(G_IS_ICON(g_icon)) {
|
|
|
|
|
const gchar* const* icon_names = g_themed_icon_get_names(G_THEMED_ICON(g_icon));
|
|
|
|
|
if(icon_names) {
|
2020-12-24 11:06:19 +08:00
|
|
|
|
auto p = icon_names;
|
2021-09-13 21:05:49 +08:00
|
|
|
|
while(*p) {
|
|
|
|
|
QIcon icon = QIcon::fromTheme(*p);
|
|
|
|
|
if(!icon.isNull()) {
|
|
|
|
|
return icon;
|
|
|
|
|
} else {
|
|
|
|
|
p++;
|
2020-12-24 11:06:19 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-16 15:59:13 +08:00
|
|
|
|
return QIcon::fromTheme("unknown",QIcon(":/res/icons/unknown.svg"));
|
2020-12-24 11:06:19 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getAppIcon 获取应用图标
|
|
|
|
|
* @param path .desktop文件的完整路径
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
QIcon FileUtils::getAppIcon(const QString &path) {
|
|
|
|
|
QByteArray ba;
|
|
|
|
|
ba = path.toUtf8();
|
|
|
|
|
GKeyFile * keyfile;
|
|
|
|
|
keyfile = g_key_file_new();
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)) {
|
|
|
|
|
g_key_file_free(keyfile);
|
2021-12-16 15:59:13 +08:00
|
|
|
|
return QIcon::fromTheme("unknown",QIcon(":/res/icons/unknown.svg"));
|
2020-12-24 11:06:19 +08:00
|
|
|
|
}
|
|
|
|
|
QString icon = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_ICON, NULL, NULL));
|
|
|
|
|
g_key_file_free(keyfile);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(QIcon::fromTheme(icon).isNull()) {
|
2020-12-24 11:06:19 +08:00
|
|
|
|
return QIcon(":/res/icons/desktop.png");
|
|
|
|
|
}
|
|
|
|
|
return QIcon::fromTheme(icon);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getSettingIcon 获取设置图标
|
|
|
|
|
* @param setting 设置项传入参数,格式为 About/About->Properties
|
2020-12-25 19:16:44 +08:00
|
|
|
|
* @param is_white 选择是否返回白色图标
|
2020-12-24 11:06:19 +08:00
|
|
|
|
* @return
|
|
|
|
|
*/
|
2021-12-17 17:39:37 +08:00
|
|
|
|
QIcon FileUtils::getSettingIcon(const QString &setting, const bool is_white) {
|
2020-12-24 11:06:19 +08:00
|
|
|
|
QString name = setting.left(setting.indexOf("/"));
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(! name.isEmpty()) {
|
2020-12-30 17:23:03 +08:00
|
|
|
|
name.replace(QString(name.at(0)), QString(name.at(0).toUpper()));
|
|
|
|
|
}
|
2020-12-25 19:16:44 +08:00
|
|
|
|
QString path;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(is_white) {
|
2020-12-25 19:16:44 +08:00
|
|
|
|
path = QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1White.svg").arg(name);
|
|
|
|
|
} else {
|
|
|
|
|
path = QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1.svg").arg(name);
|
|
|
|
|
}
|
2020-12-24 11:06:19 +08:00
|
|
|
|
QFile file(path);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(file.exists()) {
|
2020-12-24 11:06:19 +08:00
|
|
|
|
return QIcon(path);
|
|
|
|
|
} else {
|
2021-12-31 15:45:34 +08:00
|
|
|
|
return QIcon::fromTheme("ukui-control-center", QIcon(":/res/icons/ukui-control-center.svg")); //无插件图标时,返回控制面板应用图标
|
2021-01-14 15:19:25 +08:00
|
|
|
|
// if (is_white) {
|
|
|
|
|
// return QIcon(QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1White.svg").arg("About"));
|
|
|
|
|
// } else {
|
|
|
|
|
// return QIcon(QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1.svg").arg("About"));
|
|
|
|
|
// }
|
2020-12-24 11:06:19 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-30 17:21:06 +08:00
|
|
|
|
QIcon FileUtils::getSettingIcon() {
|
2021-12-31 15:45:34 +08:00
|
|
|
|
return QIcon::fromTheme("ukui-control-center", QIcon(":/res/icons/ukui-control-center.svg")); //返回控制面板应用图标
|
2021-11-30 17:21:06 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-12-24 11:06:19 +08:00
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getFileName 获取文件名
|
2021-01-20 15:33:49 +08:00
|
|
|
|
* @param uri 格式为"file:///home/xxx/xxx/xxxx.txt"
|
2020-12-24 11:06:19 +08:00
|
|
|
|
* @return
|
|
|
|
|
*/
|
2021-12-17 17:39:37 +08:00
|
|
|
|
QString FileUtils::getFileName(const QString &uri) {
|
2021-01-20 15:33:49 +08:00
|
|
|
|
QFileInfo info(uri);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(info.exists()) {
|
2021-01-20 15:33:49 +08:00
|
|
|
|
return info.fileName();
|
|
|
|
|
} else {
|
2020-12-25 19:16:44 +08:00
|
|
|
|
return "Unknown File";
|
|
|
|
|
}
|
2021-01-20 15:33:49 +08:00
|
|
|
|
// QUrl url = uri;
|
|
|
|
|
// if (url.fileName().isEmpty()) {
|
|
|
|
|
// return "Unknown File";
|
|
|
|
|
// }
|
|
|
|
|
// return url.fileName();
|
2020-12-24 11:06:19 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getAppName 获取应用名
|
|
|
|
|
* @param path .destop文件的完整路径
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2021-12-17 17:39:37 +08:00
|
|
|
|
QString FileUtils::getAppName(const QString &path) {
|
2020-12-24 11:06:19 +08:00
|
|
|
|
QByteArray ba;
|
|
|
|
|
ba = path.toUtf8();
|
|
|
|
|
GKeyFile * keyfile;
|
|
|
|
|
keyfile = g_key_file_new();
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)) {
|
|
|
|
|
g_key_file_free(keyfile);
|
2020-12-24 11:06:19 +08:00
|
|
|
|
return "Unknown App";
|
|
|
|
|
}
|
|
|
|
|
QString name = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_NAME, NULL, NULL));
|
|
|
|
|
g_key_file_free(keyfile);
|
|
|
|
|
return name;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getSettingName 获取设置项名
|
|
|
|
|
* @param setting 设置项传入参数,格式为 About/About->Properties
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2021-12-17 17:39:37 +08:00
|
|
|
|
QString FileUtils::getSettingName(const QString &setting) {
|
2020-12-25 19:16:44 +08:00
|
|
|
|
return setting.right(setting.length() - setting.lastIndexOf("/") - 1);
|
2020-12-24 11:06:19 +08:00
|
|
|
|
}
|
2020-12-26 12:45:28 +08:00
|
|
|
|
|
2021-07-06 16:53:32 +08:00
|
|
|
|
bool FileUtils::isOrUnder(QString pathA, QString pathB)
|
|
|
|
|
{
|
2021-07-07 10:23:59 +08:00
|
|
|
|
if(pathA[0] != "/")
|
2021-07-06 16:53:32 +08:00
|
|
|
|
pathA.prepend("/");
|
2021-07-07 14:20:22 +08:00
|
|
|
|
if(pathB[0] != "/")
|
2021-07-06 16:53:32 +08:00
|
|
|
|
pathB.prepend("/");
|
|
|
|
|
|
2021-07-07 10:23:59 +08:00
|
|
|
|
if(pathA.length() < pathB.length())
|
2021-07-06 16:53:32 +08:00
|
|
|
|
return false;
|
|
|
|
|
|
2021-07-07 10:23:59 +08:00
|
|
|
|
if(pathA == pathB || pathA.startsWith(pathB + "/"))
|
2021-07-06 16:53:32 +08:00
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-26 12:45:28 +08:00
|
|
|
|
|
2021-04-26 15:06:47 +08:00
|
|
|
|
void FileUtils::loadHanziTable(const QString &fileName) {
|
2020-12-26 12:45:28 +08:00
|
|
|
|
QFile file(fileName);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(!file.open(QFile::ReadOnly | QFile::Text)) {
|
2020-12-26 12:45:28 +08:00
|
|
|
|
qDebug("File: '%s' open failed!", file.fileName().toStdString().c_str());
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* 读取汉字对照表文件并转换为QMap存储 */
|
|
|
|
|
while(!file.atEnd()) {
|
|
|
|
|
QString content = QString::fromUtf8(file.readLine());
|
|
|
|
|
FileUtils::map_chinese2pinyin[content.split(" ").last().trimmed()] = content.split(" ").first().split(",");
|
|
|
|
|
}
|
|
|
|
|
file.close();
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-26 18:01:40 +08:00
|
|
|
|
QMimeType FileUtils::getMimetype(const QString &path) {
|
2020-12-29 20:18:36 +08:00
|
|
|
|
QMimeDatabase mdb;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
QMimeType type = mdb.mimeTypeForFile(path, QMimeDatabase::MatchContent);
|
2021-03-04 14:10:00 +08:00
|
|
|
|
|
|
|
|
|
return type;
|
2020-12-29 20:18:36 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-01-03 16:58:26 +08:00
|
|
|
|
//aborted
|
2021-04-26 15:06:47 +08:00
|
|
|
|
QString FileUtils::find(const QString &hanzi) {
|
2021-01-09 11:25:07 +08:00
|
|
|
|
// static QMap<QString, QStringList> map = loadHanziTable("://index/pinyinWithoutTone.txt");
|
|
|
|
|
// static QMap<QString, QStringList> map;
|
2020-12-26 12:45:28 +08:00
|
|
|
|
QString output;
|
|
|
|
|
QStringList stringList = hanzi.split("");
|
|
|
|
|
|
|
|
|
|
/* 遍历查找汉字-拼音对照表的内容并将汉字替换为拼音 */
|
2021-04-26 15:06:47 +08:00
|
|
|
|
for(const QString &str : stringList) {
|
|
|
|
|
if(FileUtils::map_chinese2pinyin.contains(str))
|
2020-12-26 12:45:28 +08:00
|
|
|
|
output += FileUtils::map_chinese2pinyin[str].first();
|
|
|
|
|
else
|
|
|
|
|
output += str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return output;
|
|
|
|
|
}
|
2020-12-29 16:41:30 +08:00
|
|
|
|
|
2020-12-30 15:56:23 +08:00
|
|
|
|
//DFS多音字太多直接GG
|
2021-12-17 17:39:37 +08:00
|
|
|
|
void stitchMultiToneWordsDFS(const QString &hanzi, const QString &resultAllPinYin, const QString &resultFirst, QStringList &resultList) {
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(hanzi.size() == 0) {
|
2020-12-29 19:30:48 +08:00
|
|
|
|
resultList.append(resultAllPinYin);
|
|
|
|
|
resultList.append(resultFirst);
|
2020-12-29 16:41:30 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(FileUtils::map_chinese2pinyin.contains(hanzi.at(0))) {
|
|
|
|
|
for(auto i : FileUtils::map_chinese2pinyin[hanzi.at(0)]) {
|
2020-12-29 19:30:48 +08:00
|
|
|
|
stitchMultiToneWordsDFS(hanzi.right(hanzi.size() - 1), resultAllPinYin + i, resultFirst + i.at(0), resultList);
|
2020-12-29 16:41:30 +08:00
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
2020-12-29 19:30:48 +08:00
|
|
|
|
stitchMultiToneWordsDFS(hanzi.right(hanzi.size() - 1), resultAllPinYin + hanzi.at(0), resultFirst + hanzi.at(0), resultList);
|
2020-12-29 16:41:30 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-30 15:56:23 +08:00
|
|
|
|
//BFS+Stack多音字太多会爆栈
|
2021-12-17 17:39:37 +08:00
|
|
|
|
void stitchMultiToneWordsBFSStack(const QString &hanzi, QStringList &resultList) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
QString tempHanzi, resultAllPinYin, resultFirst;
|
|
|
|
|
QQueue<QString> tempQueue;
|
|
|
|
|
tempHanzi = hanzi;
|
|
|
|
|
int tempQueueSize = 0;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
|
|
|
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue.enqueue(i);
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue.enqueue(tempHanzi.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(tempHanzi.size() != 0) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueueSize = tempQueue.size();
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
|
|
|
for(int j = 0; j < tempQueueSize; ++j) {
|
|
|
|
|
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue.enqueue(tempQueue.head() + i);
|
|
|
|
|
}
|
|
|
|
|
tempQueue.dequeue();
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
|
|
|
|
for(int j = 0; j < tempQueueSize; ++j) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
|
|
|
|
|
tempQueue.dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(!tempQueue.empty()) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
resultList.append(tempQueue.dequeue());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//BFS+Heap,多音字太多会耗尽内存
|
2021-12-17 17:39:37 +08:00
|
|
|
|
void stitchMultiToneWordsBFSHeap(const QString &hanzi, QStringList &resultList) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
QString tempHanzi, resultAllPinYin, resultFirst;
|
|
|
|
|
QQueue<QString>* tempQueue = new QQueue<QString>;
|
|
|
|
|
tempHanzi = hanzi;
|
|
|
|
|
int tempQueueSize = 0;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
|
|
|
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue->enqueue(i);
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue->enqueue(tempHanzi.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(tempHanzi.size() != 0) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueueSize = tempQueue->size();
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
|
|
|
for(int j = 0; j < tempQueueSize; ++j) {
|
|
|
|
|
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue->enqueue(tempQueue->head() + i);
|
|
|
|
|
}
|
|
|
|
|
tempQueue->dequeue();
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
|
|
|
|
for(int j = 0; j < tempQueueSize; ++j) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
|
|
|
|
|
tempQueue->dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(!tempQueue->empty()) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
resultList.append(tempQueue->dequeue());
|
|
|
|
|
}
|
|
|
|
|
delete tempQueue;
|
|
|
|
|
tempQueue = nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//BFS+Heap+超过3个多音字只建一个索引,比较折中的方案
|
2021-12-17 17:39:37 +08:00
|
|
|
|
void stitchMultiToneWordsBFSHeapLess3(const QString &hanzi, QStringList &resultList) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
QString tempHanzi, resultAllPinYin, resultFirst;
|
|
|
|
|
QQueue<QString>* tempQueue = new QQueue<QString>;
|
|
|
|
|
QQueue<QString>* tempQueueFirst = new QQueue<QString>;
|
|
|
|
|
tempHanzi = hanzi;
|
|
|
|
|
int tempQueueSize = 0;
|
|
|
|
|
int multiToneWordNum = 0;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
for(auto i : hanzi) {
|
|
|
|
|
if(FileUtils::map_chinese2pinyin.contains(i)) {
|
|
|
|
|
if(FileUtils::map_chinese2pinyin[i].size() > 1) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
++multiToneWordNum;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(multiToneWordNum > 3) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
QString oneResult, oneResultFirst;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
for(auto i : hanzi) {
|
|
|
|
|
if(FileUtils::map_chinese2pinyin.contains(i)) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
oneResult += FileUtils::map_chinese2pinyin[i].first();
|
|
|
|
|
oneResultFirst += FileUtils::map_chinese2pinyin[i].first().at(0);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
oneResult += i;
|
|
|
|
|
oneResultFirst += i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
resultList.append(oneResult);
|
|
|
|
|
resultList.append(oneResultFirst);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
|
|
|
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue->enqueue(i);
|
|
|
|
|
tempQueueFirst->enqueue(i.at(0));
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue->enqueue(tempHanzi.at(0));
|
|
|
|
|
tempQueueFirst->enqueue(tempHanzi.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(tempHanzi.size() != 0) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueueSize = tempQueue->size();
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
|
|
|
for(int j = 0; j < tempQueueSize; ++j) {
|
|
|
|
|
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue->enqueue(tempQueue->head() + i);
|
|
|
|
|
tempQueueFirst->enqueue(tempQueueFirst->head() + i.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempQueue->dequeue();
|
|
|
|
|
tempQueueFirst->dequeue();
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
|
|
|
|
for(int j = 0; j < tempQueueSize; ++j) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
|
|
|
|
|
tempQueueFirst->enqueue(tempQueueFirst->head() + tempHanzi.at(0));
|
|
|
|
|
tempQueue->dequeue();
|
|
|
|
|
tempQueueFirst->dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(!tempQueue->empty()) {
|
2020-12-30 15:56:23 +08:00
|
|
|
|
resultList.append(tempQueue->dequeue());
|
|
|
|
|
resultList.append(tempQueueFirst->dequeue());
|
|
|
|
|
}
|
|
|
|
|
delete tempQueue;
|
|
|
|
|
delete tempQueueFirst;
|
|
|
|
|
tempQueue = nullptr;
|
|
|
|
|
tempQueueFirst = nullptr;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-03 16:58:26 +08:00
|
|
|
|
//BFS+Stack+超过3个多音字只建一个索引,比较折中的方案
|
2021-12-17 17:39:37 +08:00
|
|
|
|
void stitchMultiToneWordsBFSStackLess3(const QString &hanzi, QStringList &resultList) {
|
2022-03-02 09:27:40 +08:00
|
|
|
|
QString tempHanzi;
|
2021-01-03 16:58:26 +08:00
|
|
|
|
QQueue<QString> tempQueue;
|
|
|
|
|
QQueue<QString> tempQueueFirst;
|
|
|
|
|
tempHanzi = hanzi;
|
|
|
|
|
int tempQueueSize = 0;
|
|
|
|
|
int multiToneWordNum = 0;
|
2022-03-02 09:27:40 +08:00
|
|
|
|
|
|
|
|
|
for (auto i:hanzi) {
|
2022-05-17 15:33:19 +08:00
|
|
|
|
if (HanZiToPinYin::getInstance()->isMultiTone(QString(i).toStdString()))
|
2022-03-02 09:27:40 +08:00
|
|
|
|
++multiToneWordNum;
|
2021-01-03 16:58:26 +08:00
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(multiToneWordNum > 3) {
|
2021-01-03 16:58:26 +08:00
|
|
|
|
QString oneResult, oneResultFirst;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
for(auto i : hanzi) {
|
2022-03-02 09:27:40 +08:00
|
|
|
|
QStringList results;
|
2022-05-17 15:33:19 +08:00
|
|
|
|
HanZiToPinYin::getInstance()->getResults(QString(i).toStdString(), results);
|
2022-03-02 09:27:40 +08:00
|
|
|
|
if(results.size()) {
|
|
|
|
|
oneResult += results.first();
|
|
|
|
|
oneResultFirst += results.first().at(0);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
2021-01-03 16:58:26 +08:00
|
|
|
|
oneResult += i;
|
|
|
|
|
oneResultFirst += i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
resultList.append(oneResult);
|
|
|
|
|
resultList.append(oneResultFirst);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 09:27:40 +08:00
|
|
|
|
QStringList results;
|
2022-05-17 15:33:19 +08:00
|
|
|
|
HanZiToPinYin::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
|
2022-03-02 09:27:40 +08:00
|
|
|
|
if(results.size()) {
|
|
|
|
|
for(auto i : results) {
|
2021-01-03 16:58:26 +08:00
|
|
|
|
tempQueue.enqueue(i);
|
|
|
|
|
tempQueueFirst.enqueue(i.at(0));
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
2021-01-03 16:58:26 +08:00
|
|
|
|
tempQueue.enqueue(tempHanzi.at(0));
|
|
|
|
|
tempQueueFirst.enqueue(tempHanzi.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(tempHanzi.size() != 0) {
|
2022-05-17 15:33:19 +08:00
|
|
|
|
HanZiToPinYin::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
|
2021-01-03 16:58:26 +08:00
|
|
|
|
tempQueueSize = tempQueue.size();
|
2022-03-02 09:27:40 +08:00
|
|
|
|
if(results.size()) {
|
2021-04-26 15:06:47 +08:00
|
|
|
|
for(int j = 0; j < tempQueueSize; ++j) {
|
2022-03-02 09:27:40 +08:00
|
|
|
|
for(auto i : results) {
|
2021-01-03 16:58:26 +08:00
|
|
|
|
tempQueue.enqueue(tempQueue.head() + i);
|
|
|
|
|
tempQueueFirst.enqueue(tempQueueFirst.head() + i.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempQueue.dequeue();
|
|
|
|
|
tempQueueFirst.dequeue();
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
} else {
|
|
|
|
|
for(int j = 0; j < tempQueueSize; ++j) {
|
2021-01-03 16:58:26 +08:00
|
|
|
|
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
|
|
|
|
|
tempQueueFirst.enqueue(tempQueueFirst.head() + tempHanzi.at(0));
|
|
|
|
|
tempQueue.dequeue();
|
|
|
|
|
tempQueueFirst.dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
}
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(!tempQueue.empty()) {
|
2021-01-03 16:58:26 +08:00
|
|
|
|
resultList.append(tempQueue.dequeue());
|
|
|
|
|
resultList.append(tempQueueFirst.dequeue());
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 17:39:37 +08:00
|
|
|
|
QStringList FileUtils::findMultiToneWords(const QString &hanzi) {
|
2020-12-29 16:41:30 +08:00
|
|
|
|
QStringList output;
|
2021-01-03 16:58:26 +08:00
|
|
|
|
stitchMultiToneWordsBFSStackLess3(hanzi, output);
|
2020-12-29 16:41:30 +08:00
|
|
|
|
return output;
|
2020-12-29 20:38:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-12-29 20:18:36 +08:00
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getDocxTextContent
|
|
|
|
|
* @param path: abs path
|
|
|
|
|
* @return docx to QString
|
|
|
|
|
*/
|
2022-10-26 18:01:40 +08:00
|
|
|
|
void FileUtils::getDocxTextContent(const QString &path, QString &textcontent) {
|
2021-04-08 16:11:58 +08:00
|
|
|
|
//fix me :optimized by xpath??
|
2020-12-29 20:18:36 +08:00
|
|
|
|
QFileInfo info = QFileInfo(path);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(!info.exists() || info.isDir())
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2020-12-31 21:14:13 +08:00
|
|
|
|
QuaZip file(path);
|
|
|
|
|
if(!file.open(QuaZip::mdUnzip))
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2020-12-29 20:18:36 +08:00
|
|
|
|
|
2021-06-25 16:30:46 +08:00
|
|
|
|
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) {
|
|
|
|
|
file.close();
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2021-06-25 16:30:46 +08:00
|
|
|
|
}
|
2020-12-29 20:18:36 +08:00
|
|
|
|
QuaZipFile fileR(&file);
|
|
|
|
|
|
|
|
|
|
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
|
|
|
|
|
2021-05-28 15:55:26 +08:00
|
|
|
|
QXmlStreamReader reader(&fileR);
|
|
|
|
|
|
|
|
|
|
while (!reader.atEnd()){
|
|
|
|
|
if(reader.readNextStartElement() and reader.name().toString() == "t"){
|
|
|
|
|
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fileR.close();
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* //原加载DOM文档方式;
|
2020-12-29 20:18:36 +08:00
|
|
|
|
QDomDocument doc;
|
|
|
|
|
doc.setContent(fileR.readAll());
|
2021-04-08 16:11:58 +08:00
|
|
|
|
fileR.close();
|
2020-12-29 20:18:36 +08:00
|
|
|
|
QDomElement first = doc.firstChildElement("w:document");
|
2021-01-19 20:59:46 +08:00
|
|
|
|
QDomElement body = first.firstChildElement("w:body");
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(!body.isNull()) {
|
|
|
|
|
QDomElement wp = body.firstChildElement("w:p");
|
|
|
|
|
while(!wp.isNull()) {
|
|
|
|
|
QDomElement wr = wp.firstChildElement("w:r");
|
|
|
|
|
while(!wr.isNull()) {
|
2021-01-19 20:59:46 +08:00
|
|
|
|
QDomElement wt = wr.firstChildElement("w:t");
|
2021-06-10 20:43:57 +08:00
|
|
|
|
textcontent.append(wt.text().replace("\n", "")).replace("\r", " ");
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
|
2021-03-16 17:21:10 +08:00
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
}
|
2021-01-19 20:59:46 +08:00
|
|
|
|
wr = wr.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
wp = wp.nextSiblingElement();
|
2020-12-29 20:18:36 +08:00
|
|
|
|
}
|
2021-01-19 20:59:46 +08:00
|
|
|
|
body = body.nextSiblingElement();
|
2020-12-29 20:18:36 +08:00
|
|
|
|
}
|
2021-01-19 20:59:46 +08:00
|
|
|
|
file.close();
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2021-05-28 15:55:26 +08:00
|
|
|
|
*/
|
2020-12-29 20:18:36 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-10-26 18:01:40 +08:00
|
|
|
|
void FileUtils::getPptxTextContent(const QString &path, QString &textcontent) {
|
2021-04-08 16:11:58 +08:00
|
|
|
|
QFileInfo info = QFileInfo(path);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(!info.exists() || info.isDir())
|
2021-04-08 16:11:58 +08:00
|
|
|
|
return;
|
|
|
|
|
QuaZip file(path);
|
|
|
|
|
if(!file.open(QuaZip::mdUnzip))
|
|
|
|
|
return;
|
|
|
|
|
QString prefix("ppt/slides/slide");
|
|
|
|
|
QStringList fileList;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
for(QString i : file.getFileNameList()) {
|
2021-04-08 16:11:58 +08:00
|
|
|
|
if(i.startsWith(prefix))
|
2021-04-26 15:06:47 +08:00
|
|
|
|
fileList << i;
|
2021-04-08 16:11:58 +08:00
|
|
|
|
}
|
2021-06-25 16:30:46 +08:00
|
|
|
|
if(fileList.isEmpty()) {
|
|
|
|
|
file.close();
|
2021-04-08 16:11:58 +08:00
|
|
|
|
return;
|
2021-06-25 16:30:46 +08:00
|
|
|
|
}
|
2021-05-28 15:55:26 +08:00
|
|
|
|
|
|
|
|
|
for(int i = 0; i < fileList.size(); ++i){
|
|
|
|
|
QString name = prefix + QString::number(i + 1) + ".xml";
|
|
|
|
|
if(!file.setCurrentFile(name)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
QuaZipFile fileR(&file);
|
|
|
|
|
fileR.open(QIODevice::ReadOnly);
|
|
|
|
|
|
|
|
|
|
QXmlStreamReader reader(&fileR);
|
|
|
|
|
|
|
|
|
|
while (!reader.atEnd()){
|
|
|
|
|
if(reader.readNextStartElement() and reader.name().toString() == "t"){
|
|
|
|
|
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fileR.close();
|
|
|
|
|
}
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/*
|
2021-04-08 16:11:58 +08:00
|
|
|
|
QDomElement sptree;
|
|
|
|
|
QDomElement sp;
|
|
|
|
|
QDomElement txbody;
|
|
|
|
|
QDomElement ap;
|
|
|
|
|
QDomElement ar;
|
2021-04-13 13:57:02 +08:00
|
|
|
|
QDomDocument doc;
|
2021-04-08 16:11:58 +08:00
|
|
|
|
QDomElement at;
|
2021-04-13 13:57:02 +08:00
|
|
|
|
// QDomNodeList atList;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
for(int i = 0; i < fileList.size(); ++i) {
|
|
|
|
|
QString name = prefix + QString::number(i + 1) + ".xml";
|
|
|
|
|
if(!file.setCurrentFile(name)) {
|
2021-04-08 16:11:58 +08:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
QuaZipFile fileR(&file);
|
|
|
|
|
fileR.open(QIODevice::ReadOnly);
|
2021-04-13 13:57:02 +08:00
|
|
|
|
doc.clear();
|
2021-04-08 16:11:58 +08:00
|
|
|
|
doc.setContent(fileR.readAll());
|
|
|
|
|
fileR.close();
|
2021-04-13 13:57:02 +08:00
|
|
|
|
|
|
|
|
|
//fix me :optimized by xpath??
|
|
|
|
|
//This method looks better but slower,
|
|
|
|
|
//If xml file is very large with many useless node,this method will take a lot of time.
|
|
|
|
|
|
|
|
|
|
// atList = doc.elementsByTagName("a:t");
|
|
|
|
|
// for(int i = 0; i<atList.size(); ++i)
|
|
|
|
|
// {
|
|
|
|
|
// at = atList.at(i).toElement();
|
|
|
|
|
// if(!at.isNull())
|
|
|
|
|
// {
|
|
|
|
|
// textcontent.append(at.text().replace("\r","")).replace("\t"," ");
|
|
|
|
|
// if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
|
|
|
|
// {
|
|
|
|
|
// file.close();
|
|
|
|
|
// return;
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
//This is ugly but seems more efficient when handel a large file.
|
2021-04-08 16:11:58 +08:00
|
|
|
|
sptree = doc.firstChildElement("p:sld").firstChildElement("p:cSld").firstChildElement("p:spTree");
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(!sptree.isNull()) {
|
|
|
|
|
sp = sptree.firstChildElement("p:sp");
|
|
|
|
|
while(!sp.isNull()) {
|
|
|
|
|
txbody = sp.firstChildElement("p:txBody");
|
|
|
|
|
while(!txbody.isNull()) {
|
2021-04-08 16:11:58 +08:00
|
|
|
|
ap = txbody.firstChildElement("a:p");
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(!ap.isNull()) {
|
2021-04-08 16:11:58 +08:00
|
|
|
|
ar = ap.firstChildElement("a:r");
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(!ar.isNull()) {
|
2021-04-08 16:11:58 +08:00
|
|
|
|
at = ar.firstChildElement("a:t");
|
2021-04-26 15:06:47 +08:00
|
|
|
|
textcontent.append(at.text().replace("\r", "")).replace("\t", "");
|
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
|
2021-04-08 16:11:58 +08:00
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
ar = ar.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
ap = ap.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
txbody = txbody.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
sp = sp.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
sptree = sptree.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
2021-05-28 15:55:26 +08:00
|
|
|
|
*/
|
2021-04-08 16:11:58 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-10-26 18:01:40 +08:00
|
|
|
|
void FileUtils::getXlsxTextContent(const QString &path, QString &textcontent) {
|
2021-04-13 14:53:55 +08:00
|
|
|
|
QFileInfo info = QFileInfo(path);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(!info.exists() || info.isDir())
|
2021-04-13 14:53:55 +08:00
|
|
|
|
return;
|
|
|
|
|
QuaZip file(path);
|
|
|
|
|
if(!file.open(QuaZip::mdUnzip))
|
|
|
|
|
return;
|
|
|
|
|
|
2021-06-25 16:30:46 +08:00
|
|
|
|
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) {
|
|
|
|
|
file.close();
|
2021-04-13 14:53:55 +08:00
|
|
|
|
return;
|
2021-06-25 16:30:46 +08:00
|
|
|
|
}
|
2021-04-13 14:53:55 +08:00
|
|
|
|
QuaZipFile fileR(&file);
|
|
|
|
|
|
2021-05-28 15:55:26 +08:00
|
|
|
|
fileR.open(QIODevice::ReadOnly);
|
2021-04-13 14:53:55 +08:00
|
|
|
|
|
2021-05-28 15:55:26 +08:00
|
|
|
|
QXmlStreamReader reader(&fileR);
|
|
|
|
|
|
|
|
|
|
while (!reader.atEnd()){
|
|
|
|
|
if(reader.readNextStartElement() and reader.name().toString() == "t"){
|
|
|
|
|
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fileR.close();
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/*
|
2021-04-13 14:53:55 +08:00
|
|
|
|
QDomDocument doc;
|
|
|
|
|
doc.setContent(fileR.readAll());
|
|
|
|
|
fileR.close();
|
|
|
|
|
QDomElement sst = doc.firstChildElement("sst");
|
|
|
|
|
QDomElement si;
|
|
|
|
|
QDomElement r;
|
|
|
|
|
QDomElement t;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
while(!sst.isNull()) {
|
|
|
|
|
si = sst.firstChildElement("si");
|
|
|
|
|
while(!si.isNull()) {
|
|
|
|
|
r = si.firstChildElement("r");
|
|
|
|
|
if(r.isNull()) {
|
|
|
|
|
t = si.firstChildElement("t");
|
|
|
|
|
} else {
|
2021-04-13 14:53:55 +08:00
|
|
|
|
t = r.firstChildElement("t");
|
|
|
|
|
}
|
|
|
|
|
if(t.isNull())
|
|
|
|
|
continue;
|
2021-04-26 15:06:47 +08:00
|
|
|
|
textcontent.append(t.text().replace("\r", "").replace("\n", ""));
|
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
|
2021-04-13 14:53:55 +08:00
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
si = si.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
sst = sst.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
2021-05-28 15:55:26 +08:00
|
|
|
|
*/
|
2021-04-13 14:53:55 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-10-26 18:01:40 +08:00
|
|
|
|
void FileUtils::getPdfTextContent(const QString &path, QString &textcontent) {
|
2021-04-15 09:19:36 +08:00
|
|
|
|
Poppler::Document *doc = Poppler::Document::load(path);
|
2021-06-25 16:30:46 +08:00
|
|
|
|
if(doc->isLocked()) {
|
|
|
|
|
delete doc;
|
2021-04-15 09:19:36 +08:00
|
|
|
|
return;
|
2021-06-25 16:30:46 +08:00
|
|
|
|
}
|
2021-04-15 09:19:36 +08:00
|
|
|
|
const QRectF qf;
|
|
|
|
|
int pageNum = doc->numPages();
|
2021-04-26 15:06:47 +08:00
|
|
|
|
for(int i = 0; i < pageNum; ++i) {
|
2022-10-26 18:01:40 +08:00
|
|
|
|
Poppler::Page *page = doc->page(i);
|
|
|
|
|
if(page) {
|
|
|
|
|
textcontent.append(page->text(qf).replace("\n", "").replace("\r", " "));
|
|
|
|
|
delete page;
|
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
|
|
|
|
|
break;
|
|
|
|
|
}
|
2021-04-15 09:19:36 +08:00
|
|
|
|
}
|
|
|
|
|
delete doc;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-26 18:01:40 +08:00
|
|
|
|
void FileUtils::getTxtContent(const QString &path, QString &textcontent) {
|
2020-12-29 20:18:36 +08:00
|
|
|
|
QFile file(path);
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(!file.open(QIODevice::ReadOnly | QIODevice::Text))
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2021-01-14 20:56:14 +08:00
|
|
|
|
|
2021-04-08 16:11:58 +08:00
|
|
|
|
QByteArray encodedString = file.read(MAX_CONTENT_LENGTH);
|
2021-01-14 20:56:14 +08:00
|
|
|
|
|
|
|
|
|
uchardet_t chardet = uchardet_new();
|
2021-04-26 15:06:47 +08:00
|
|
|
|
if(uchardet_handle_data(chardet, encodedString.constData(), encodedString.size()) != 0)
|
|
|
|
|
qWarning() << "Txt file encoding format detect fail!" << path;
|
2021-01-14 20:56:14 +08:00
|
|
|
|
|
|
|
|
|
uchardet_data_end(chardet);
|
|
|
|
|
const char *codec = uchardet_get_charset(chardet);
|
|
|
|
|
|
|
|
|
|
if(QTextCodec::codecForName(codec) == 0)
|
2021-04-26 15:06:47 +08:00
|
|
|
|
qWarning() << "Unsupported Text encoding format" << path << QString::fromLocal8Bit(codec);
|
2021-01-14 20:56:14 +08:00
|
|
|
|
|
2021-04-26 15:06:47 +08:00
|
|
|
|
QTextStream stream(encodedString, QIODevice::ReadOnly);
|
2021-01-14 20:56:14 +08:00
|
|
|
|
stream.setCodec(codec);
|
2021-02-07 10:11:30 +08:00
|
|
|
|
uchardet_delete(chardet);
|
2021-01-14 20:56:14 +08:00
|
|
|
|
|
2021-05-28 15:55:26 +08:00
|
|
|
|
textcontent = stream.readAll().replace("\n", "").replace("\r", " ");
|
2021-01-19 19:26:39 +08:00
|
|
|
|
|
|
|
|
|
file.close();
|
|
|
|
|
encodedString.clear();
|
|
|
|
|
chardet = NULL;
|
|
|
|
|
stream.flush();
|
|
|
|
|
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2020-12-29 16:41:30 +08:00
|
|
|
|
}
|
2021-05-27 21:10:11 +08:00
|
|
|
|
|
2021-09-23 10:31:13 +08:00
|
|
|
|
int FileUtils::openFile(QString &path, bool openInDir)
|
2021-05-27 21:10:11 +08:00
|
|
|
|
{
|
2022-03-07 11:38:03 +08:00
|
|
|
|
int res = -1;
|
2021-05-27 21:10:11 +08:00
|
|
|
|
if(openInDir) {
|
2021-11-05 10:38:04 +08:00
|
|
|
|
QStringList list;
|
|
|
|
|
list.append(path);
|
|
|
|
|
QDBusMessage message = QDBusMessage::createMethodCall("org.freedesktop.FileManager1",
|
|
|
|
|
"/org/freedesktop/FileManager1",
|
|
|
|
|
"org.freedesktop.FileManager1",
|
|
|
|
|
"ShowItems");
|
|
|
|
|
message.setArguments({list, "ukui-search"});
|
2022-03-07 11:38:03 +08:00
|
|
|
|
QDBusMessage messageRes = QDBusConnection::sessionBus().call(message);
|
|
|
|
|
if (QDBusMessage::ReplyMessage == messageRes.ReplyMessage) {
|
|
|
|
|
res = 0;
|
2021-11-05 10:38:04 +08:00
|
|
|
|
} else {
|
2022-03-07 11:38:03 +08:00
|
|
|
|
qDebug() << "Error! QDBusMessage reply error! ReplyMessage:" << messageRes.ReplyMessage;
|
|
|
|
|
res = -1;
|
2021-11-05 10:38:04 +08:00
|
|
|
|
}
|
2021-05-27 21:10:11 +08:00
|
|
|
|
} else {
|
2021-09-23 10:31:13 +08:00
|
|
|
|
auto file = wrapGFile(g_file_new_for_uri(QUrl::fromLocalFile(path).toString().toUtf8().constData()));
|
|
|
|
|
auto fileInfo = wrapGFileInfo(g_file_query_info(file.get()->get(),
|
|
|
|
|
"standard::*," "time::*," "access::*," "mountable::*," "metadata::*," "trash::*," G_FILE_ATTRIBUTE_ID_FILE,
|
|
|
|
|
G_FILE_QUERY_INFO_NONE,
|
|
|
|
|
nullptr,
|
|
|
|
|
nullptr));
|
|
|
|
|
QString mimeType = g_file_info_get_content_type (fileInfo.get()->get());
|
|
|
|
|
if (mimeType == nullptr) {
|
|
|
|
|
if (g_file_info_has_attribute(fileInfo.get()->get(), "standard::fast-content-type")) {
|
|
|
|
|
mimeType = g_file_info_get_attribute_string(fileInfo.get()->get(), "standard::fast-content-type");
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-03-07 11:38:03 +08:00
|
|
|
|
|
2021-09-23 10:31:13 +08:00
|
|
|
|
GError *error = NULL;
|
|
|
|
|
GAppInfo *info = NULL;
|
|
|
|
|
/*
|
|
|
|
|
* g_app_info_get_default_for_type function get wrong default app, so we get the
|
|
|
|
|
* default app info from mimeapps.list, and chose the right default app for mimeType file
|
|
|
|
|
*/
|
|
|
|
|
QString mimeAppsListPath = QStandardPaths::writableLocation(QStandardPaths::HomeLocation)
|
|
|
|
|
+ "/.config/mimeapps.list";
|
|
|
|
|
GKeyFile *keyfile = g_key_file_new();
|
|
|
|
|
gboolean ret = g_key_file_load_from_file(keyfile, mimeAppsListPath.toUtf8(), G_KEY_FILE_NONE, &error);
|
|
|
|
|
if (false == ret) {
|
|
|
|
|
qWarning()<<"load mimeapps list error msg"<<error->message;
|
|
|
|
|
info = g_app_info_get_default_for_type(mimeType.toUtf8().constData(), false);
|
|
|
|
|
g_error_free(error);
|
|
|
|
|
} else {
|
|
|
|
|
gchar *desktopApp = g_key_file_get_string(keyfile, "Default Applications", mimeType.toUtf8(), &error);
|
|
|
|
|
if (NULL != desktopApp) {
|
|
|
|
|
info = (GAppInfo*)g_desktop_app_info_new(desktopApp);
|
|
|
|
|
g_free (desktopApp);
|
|
|
|
|
} else {
|
|
|
|
|
info = g_app_info_get_default_for_type(mimeType.toUtf8().constData(), false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
g_key_file_free (keyfile);
|
|
|
|
|
if(!G_IS_APP_INFO(info)) {
|
2022-03-07 11:38:03 +08:00
|
|
|
|
res = -1;
|
|
|
|
|
} else {
|
2022-05-10 13:49:54 +08:00
|
|
|
|
bool isSuccess(false);
|
|
|
|
|
QDBusInterface * appLaunchInterface = new QDBusInterface("com.kylin.AppManager",
|
|
|
|
|
"/com/kylin/AppManager",
|
|
|
|
|
"com.kylin.AppManager",
|
|
|
|
|
QDBusConnection::sessionBus());
|
|
|
|
|
if(!appLaunchInterface->isValid()) {
|
|
|
|
|
qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message());
|
|
|
|
|
isSuccess = false;
|
|
|
|
|
} else {
|
|
|
|
|
appLaunchInterface->setTimeout(10000);
|
2022-06-28 17:36:31 +08:00
|
|
|
|
QDBusReply<bool> reply = appLaunchInterface->call("LaunchDefaultAppWithUrl", QUrl::fromLocalFile(path).toString());
|
2022-05-10 13:49:54 +08:00
|
|
|
|
if(reply.isValid()) {
|
|
|
|
|
isSuccess = reply;
|
|
|
|
|
} else {
|
|
|
|
|
qWarning() << "SoftWareCenter dbus called failed!";
|
|
|
|
|
isSuccess = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(appLaunchInterface) {
|
|
|
|
|
delete appLaunchInterface;
|
|
|
|
|
}
|
|
|
|
|
appLaunchInterface = NULL;
|
|
|
|
|
if (!isSuccess){
|
|
|
|
|
QDesktopServices::openUrl(QUrl::fromLocalFile(path));
|
|
|
|
|
}
|
2022-03-07 11:38:03 +08:00
|
|
|
|
res = 0;
|
2021-09-23 10:31:13 +08:00
|
|
|
|
}
|
2022-03-07 11:38:03 +08:00
|
|
|
|
g_object_unref(info);
|
2021-05-27 21:10:11 +08:00
|
|
|
|
}
|
2022-03-07 11:38:03 +08:00
|
|
|
|
return res;
|
2021-05-27 21:10:11 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FileUtils::copyPath(QString &path)
|
|
|
|
|
{
|
|
|
|
|
QApplication::clipboard()->setText(path);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2021-07-31 16:12:04 +08:00
|
|
|
|
|
|
|
|
|
QString FileUtils::escapeHtml(const QString &str)
|
|
|
|
|
{
|
|
|
|
|
QString temp = str;
|
|
|
|
|
temp.replace("<", "<");
|
|
|
|
|
temp.replace(">", ">");
|
|
|
|
|
return temp;
|
|
|
|
|
}
|
2021-08-06 17:45:28 +08:00
|
|
|
|
|
|
|
|
|
QString FileUtils::chineseSubString(const std::string &myStr, int start, int length)
|
|
|
|
|
{
|
|
|
|
|
std::string afterSub = "";
|
|
|
|
|
//越界保护
|
|
|
|
|
if(start < 0 || length < 0){
|
|
|
|
|
return " ";
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-23 20:49:21 +08:00
|
|
|
|
QString sub = QString::fromStdString(myStr);
|
2021-08-06 17:45:28 +08:00
|
|
|
|
QFont ft(QApplication::font().family(),QApplication::font().pointSize());
|
|
|
|
|
QFontMetrics fm (ft);
|
|
|
|
|
|
2021-09-23 20:49:21 +08:00
|
|
|
|
if (length >= myStr.length()) {
|
|
|
|
|
afterSub = myStr.substr(start,length); //截取;
|
|
|
|
|
if (fm.width(QString::fromStdString(afterSub)) >= 2*LABEL_MAX_WIDTH) {
|
|
|
|
|
sub = fm.elidedText(sub, Qt::ElideRight, 2*LABEL_MAX_WIDTH); //超过两行则省略
|
|
|
|
|
} else {
|
|
|
|
|
sub = fm.elidedText(sub, Qt::ElideLeft, 2*LABEL_MAX_WIDTH); //超过两行则省略
|
|
|
|
|
}
|
|
|
|
|
return sub;
|
|
|
|
|
}
|
2021-08-06 17:45:28 +08:00
|
|
|
|
if (start + length <= myStr.length()) {
|
|
|
|
|
afterSub = myStr.substr(start,length); //截取
|
|
|
|
|
sub = QString::fromStdString(afterSub); //转QString
|
|
|
|
|
|
|
|
|
|
if(start + length < myStr.length()){
|
2021-09-23 20:49:21 +08:00
|
|
|
|
sub.replace(sub.length()-3,3,"…"); //替换后三位
|
|
|
|
|
} else{
|
|
|
|
|
sub.append("…"); //直接加
|
2021-08-06 17:45:28 +08:00
|
|
|
|
}
|
|
|
|
|
sub = fm.elidedText(sub, Qt::ElideRight, 2*LABEL_MAX_WIDTH); //超过两行则省略
|
2021-09-23 20:49:21 +08:00
|
|
|
|
} else {
|
2021-08-06 17:45:28 +08:00
|
|
|
|
int newStart = myStr.length()-length; //更新截取位置
|
2021-09-23 20:49:21 +08:00
|
|
|
|
|
2021-08-06 17:45:28 +08:00
|
|
|
|
afterSub = myStr.substr(newStart, length);
|
|
|
|
|
sub=QString::fromStdString(afterSub);
|
2021-09-23 20:49:21 +08:00
|
|
|
|
if (fm.width(QString::fromStdString(myStr.substr(newStart, start))) >= 2*LABEL_MAX_WIDTH) {
|
|
|
|
|
sub = fm.elidedText(sub, Qt::ElideLeft, 2*LABEL_MAX_WIDTH);
|
|
|
|
|
} else {
|
|
|
|
|
if (newStart + 3 < start) {
|
|
|
|
|
sub.replace(0,3,"…").append("…");
|
|
|
|
|
} else {
|
|
|
|
|
afterSub = myStr.substr(start, length);
|
|
|
|
|
sub = "…" + QString::fromStdString(afterSub);
|
|
|
|
|
sub.append("…");
|
|
|
|
|
}
|
|
|
|
|
sub = fm.elidedText(sub, Qt::ElideRight, 2*LABEL_MAX_WIDTH);
|
|
|
|
|
}
|
2021-08-06 17:45:28 +08:00
|
|
|
|
}
|
|
|
|
|
return sub;
|
|
|
|
|
}
|
2021-08-26 11:22:10 +08:00
|
|
|
|
|
2021-09-08 15:31:47 +08:00
|
|
|
|
QIcon FileUtils::iconFromTheme(const QString &name, const QIcon &iconDefault)
|
|
|
|
|
{
|
|
|
|
|
QMutexLocker locker(&iconMutex);
|
|
|
|
|
return QIcon::fromTheme(name, iconDefault);
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-26 18:01:40 +08:00
|
|
|
|
bool FileUtils::isOpenXMLFileEncrypted(const QString &path)
|
2021-10-26 14:20:58 +08:00
|
|
|
|
{
|
|
|
|
|
QFile file(path);
|
|
|
|
|
file.open(QIODevice::ReadOnly|QIODevice::Text);
|
|
|
|
|
QByteArray encrypt = file.read(4);
|
|
|
|
|
file.close();
|
|
|
|
|
if (encrypt.length() < 4) {
|
|
|
|
|
qDebug() << "Reading file error!" << path;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
//比较前四位是否为对应值来判断OpenXML类型文件是否加密
|
|
|
|
|
if (encrypt[0] == 0x50 && encrypt[1] == 0x4b && encrypt[2] == 0x03 && encrypt[3] == 0x04) {
|
|
|
|
|
return false;
|
|
|
|
|
} else {
|
|
|
|
|
qDebug() << "Encrypt!" << path;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-10-27 15:16:43 +08:00
|
|
|
|
//todo: only support docx, pptx, xlsx
|
2022-10-26 18:01:40 +08:00
|
|
|
|
bool FileUtils::isEncrypedOrUnsupport(const QString& path, const QString& suffix)
|
2021-10-27 15:16:43 +08:00
|
|
|
|
{
|
|
|
|
|
QMimeType type = FileUtils::getMimetype(path);
|
|
|
|
|
QString name = type.name();
|
2022-10-26 18:01:40 +08:00
|
|
|
|
|
2021-10-27 15:16:43 +08:00
|
|
|
|
if(name == "application/zip") {
|
2022-10-26 18:01:40 +08:00
|
|
|
|
if (suffix == "docx" || suffix == "pptx" || suffix == "xlsx") {
|
2021-10-27 15:16:43 +08:00
|
|
|
|
|
|
|
|
|
return FileUtils::isOpenXMLFileEncrypted(path);
|
2022-10-26 18:01:40 +08:00
|
|
|
|
} else if (suffix == "uot" || suffix == "uos" || suffix == "uop") {
|
2022-05-27 16:07:09 +08:00
|
|
|
|
return false;
|
|
|
|
|
|
2022-10-26 18:01:40 +08:00
|
|
|
|
} else if (suffix == "ofd") {
|
2022-05-27 16:07:09 +08:00
|
|
|
|
return false;
|
|
|
|
|
|
2021-10-27 15:16:43 +08:00
|
|
|
|
} else {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
} else if(name == "text/plain") {
|
2022-10-26 18:01:40 +08:00
|
|
|
|
if(suffix.endsWith("txt"))
|
2021-10-27 15:16:43 +08:00
|
|
|
|
return false;
|
|
|
|
|
return true;
|
2022-04-26 10:25:23 +08:00
|
|
|
|
} else if(name == "text/html") {
|
2022-10-26 18:01:40 +08:00
|
|
|
|
if(suffix.endsWith("html"))
|
2022-04-26 10:25:23 +08:00
|
|
|
|
return false;
|
|
|
|
|
return true;
|
2021-10-27 15:16:43 +08:00
|
|
|
|
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
|
2022-10-26 18:01:40 +08:00
|
|
|
|
if(suffix == "doc" || suffix == "dot" || suffix == "wps" || suffix == "ppt" ||
|
|
|
|
|
suffix == "pps" || suffix == "dps" || suffix == "et" || suffix == "xls" || suffix == "uof") {
|
2021-10-27 15:16:43 +08:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
} else if(name == "application/pdf") {
|
2022-10-26 18:01:40 +08:00
|
|
|
|
if(suffix == "pdf")
|
2021-10-27 15:16:43 +08:00
|
|
|
|
return false;
|
|
|
|
|
return true;
|
2022-05-27 16:07:09 +08:00
|
|
|
|
|
|
|
|
|
} else if(name == "application/xml" || name == "application/uof") {
|
2022-10-26 18:01:40 +08:00
|
|
|
|
if(suffix == "uof") {
|
2022-05-27 16:07:09 +08:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
|
2022-10-26 18:01:40 +08:00
|
|
|
|
} else if (true == targetPhotographTypeMap[suffix]) {
|
|
|
|
|
return !isOcrSupportSize(path);
|
2021-10-27 15:16:43 +08:00
|
|
|
|
} else {
|
2022-10-26 18:01:40 +08:00
|
|
|
|
// qInfo() << "Unsupport format:[" << path << "][" << type.name() << "]";
|
2021-10-27 15:16:43 +08:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-10-26 14:20:58 +08:00
|
|
|
|
|
2022-04-13 13:46:30 +08:00
|
|
|
|
bool FileUtils::isOcrSupportSize(QString path)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
bool res;
|
|
|
|
|
Pix *image = pixRead(path.toStdString().data());
|
|
|
|
|
if (image->h < OCR_MIN_SIZE or image->w < OCR_MIN_SIZE) {//限制图片像素尺寸
|
|
|
|
|
qDebug() << "file:" << path << "is not right size.";
|
|
|
|
|
res = false;
|
|
|
|
|
} else
|
|
|
|
|
res = true;
|
|
|
|
|
|
|
|
|
|
pixDestroy(&image);
|
|
|
|
|
return res;
|
|
|
|
|
*/
|
|
|
|
|
QImage file(path);
|
|
|
|
|
if (file.height() < OCR_MIN_SIZE or file.width() < OCR_MIN_SIZE) {//限制图片像素尺寸
|
2022-10-26 18:01:40 +08:00
|
|
|
|
// qDebug() << "file:" << path << "is not right size.";
|
2022-04-13 13:46:30 +08:00
|
|
|
|
return false;
|
|
|
|
|
} else
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-26 11:22:10 +08:00
|
|
|
|
QString FileUtils::getHtmlText(const QString &text, const QString &keyword)
|
|
|
|
|
{
|
|
|
|
|
QString htmlString;
|
|
|
|
|
bool boldOpenned = false;
|
|
|
|
|
for(int i = 0; i < text.length(); i++) {
|
|
|
|
|
if((keyword.toUpper()).contains(QString(text.at(i)).toUpper())) {
|
|
|
|
|
if(! boldOpenned) {
|
|
|
|
|
boldOpenned = true;
|
|
|
|
|
htmlString.append(QString("<b><font size=\"4\">"));
|
|
|
|
|
}
|
|
|
|
|
htmlString.append(FileUtils::escapeHtml(QString(text.at(i))));
|
|
|
|
|
} else {
|
|
|
|
|
if(boldOpenned) {
|
|
|
|
|
boldOpenned = false;
|
|
|
|
|
htmlString.append(QString("</font></b>"));
|
|
|
|
|
}
|
|
|
|
|
htmlString.append(FileUtils::escapeHtml(QString(text.at(i))));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
htmlString.replace("\n", "<br />");//替换换行符
|
2021-12-17 17:39:37 +08:00
|
|
|
|
return "<pre>" + htmlString + "</pre>";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QString FileUtils::setAllTextBold(const QString &name)
|
|
|
|
|
{
|
|
|
|
|
return QString("<h3 style=\"font-weight:normal;\"><pre>%1</pre></h3>").arg(escapeHtml(name));
|
2021-08-26 11:22:10 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QString FileUtils::wrapData(QLabel *p_label, const QString &text)
|
|
|
|
|
{
|
|
|
|
|
QString wrapText = text;
|
|
|
|
|
|
|
|
|
|
QFontMetrics fontMetrics = p_label->fontMetrics();
|
|
|
|
|
int textSize = fontMetrics.width(wrapText);
|
|
|
|
|
|
|
|
|
|
if(textSize > LABEL_MAX_WIDTH){
|
|
|
|
|
int lastIndex = 0;
|
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
|
|
for(int i = lastIndex; i < wrapText.length(); i++) {
|
|
|
|
|
|
|
|
|
|
if(fontMetrics.width(wrapText.mid(lastIndex, i - lastIndex)) == LABEL_MAX_WIDTH) {
|
|
|
|
|
lastIndex = i;
|
|
|
|
|
wrapText.insert(i, '\n');
|
|
|
|
|
count++;
|
|
|
|
|
} else if(fontMetrics.width(wrapText.mid(lastIndex, i - lastIndex)) > LABEL_MAX_WIDTH) {
|
|
|
|
|
lastIndex = i;
|
|
|
|
|
wrapText.insert(i - 1, '\n');
|
|
|
|
|
count++;
|
|
|
|
|
} else {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(count == 2){
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// p_label->setText(wrapText);
|
|
|
|
|
return wrapText;
|
|
|
|
|
}
|
2022-05-27 16:07:09 +08:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* uof1.0解析
|
|
|
|
|
* 参考规范:GB/T 20916-2007
|
|
|
|
|
* 1.文字处理
|
|
|
|
|
* 2.电子表格
|
|
|
|
|
* 3.演示文稿
|
|
|
|
|
* ppt的内容存放在对象集中,
|
|
|
|
|
* 可以通过演示文稿-主体-幻灯片集-幻灯片下的锚点属性获取引用了哪些内容:
|
|
|
|
|
* <uof:锚点 uof:图形引用="OBJ16"/>
|
|
|
|
|
* 目标:文本串
|
|
|
|
|
*/
|
2022-10-26 18:01:40 +08:00
|
|
|
|
void FileUtils::getUOFTextContent(const QString &path, QString &textContent)
|
2022-05-27 16:07:09 +08:00
|
|
|
|
{
|
|
|
|
|
QFileInfo info(path);
|
|
|
|
|
if (!info.exists() || info.isDir()) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QFile file(path);
|
|
|
|
|
if (!file.open(QIODevice::ReadOnly)) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QDomDocument doc;
|
|
|
|
|
if (!doc.setContent(&file)) {
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
file.close();
|
|
|
|
|
|
2022-05-11 09:08:23 +08:00
|
|
|
|
bool isPPT = false;
|
2022-05-27 16:07:09 +08:00
|
|
|
|
QDomElement rootElem = doc.documentElement();
|
|
|
|
|
QDomNode node = rootElem.firstChild();
|
|
|
|
|
while (!node.isNull()) {
|
|
|
|
|
QDomElement e = node.toElement();
|
|
|
|
|
if (!e.isNull() && e.tagName() == "uof:演示文稿") {
|
2022-05-11 09:08:23 +08:00
|
|
|
|
isPPT = true;
|
2022-05-27 16:07:09 +08:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
node = node.nextSibling();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//单独处理pdf文档
|
2022-05-11 09:08:23 +08:00
|
|
|
|
if (isPPT) {
|
|
|
|
|
qDebug() << path << "is PPT";
|
2022-05-09 14:47:40 +08:00
|
|
|
|
processUOFPPT(doc, textContent);
|
2022-05-27 16:07:09 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
file.open(QIODevice::ReadOnly);
|
|
|
|
|
QXmlStreamReader reader(&file);
|
|
|
|
|
while (!reader.atEnd()) {
|
|
|
|
|
//适用于文字处理与电子表格
|
|
|
|
|
if (reader.readNextStartElement() && reader.name().toString() == "文本串") {
|
|
|
|
|
textContent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
|
|
|
|
if (textContent.length() >= MAX_CONTENT_LENGTH / 3) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
file.close();
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-09 14:47:40 +08:00
|
|
|
|
void FileUtils::processUOFPPT(const QDomDocument &doc, QString &content)
|
|
|
|
|
{
|
|
|
|
|
QDomElement rootElem = doc.documentElement();
|
|
|
|
|
QList<QDomElement> nodes;
|
|
|
|
|
QQueue<QString> names; //每个节点的名称
|
|
|
|
|
names << "uof:演示文稿" << "演:主体" << "演:幻灯片集" << "演:幻灯片";
|
|
|
|
|
|
|
|
|
|
findNodes(rootElem, names, nodes);
|
|
|
|
|
|
|
|
|
|
if (nodes.empty()) {
|
|
|
|
|
//TODO 在uof-ppt不存在锚点节点时,直接查找文本节点?
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QStringList objs;
|
|
|
|
|
//每一个 演:幻灯片 -> 锚点
|
|
|
|
|
for (const auto &node : nodes) {
|
|
|
|
|
names.clear();
|
|
|
|
|
names << "uof:锚点";
|
|
|
|
|
findNodeAttr(node, names, "uof:图形引用", objs);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nodes.clear();
|
|
|
|
|
names.clear();
|
|
|
|
|
names << "uof:对象集" << "图:图形";
|
|
|
|
|
findNodesByAttr(rootElem, names, nodes, "图:标识符", objs);
|
|
|
|
|
|
|
|
|
|
if (nodes.empty()) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-11 09:08:23 +08:00
|
|
|
|
QList<QDomElement> paraNodes; //全部段落节点
|
2022-05-09 14:47:40 +08:00
|
|
|
|
for (const auto &node : nodes) {
|
|
|
|
|
names.clear();
|
2022-05-11 09:08:23 +08:00
|
|
|
|
names << "图:文本内容" << "字:段落";
|
|
|
|
|
findNodes(node, names, paraNodes);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nodes.clear();
|
|
|
|
|
for (const auto &node : paraNodes) {
|
|
|
|
|
names.clear();
|
|
|
|
|
names << "字:句";
|
|
|
|
|
findNodes(node, names, nodes); //全部段落下的全部句节点
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const auto &node : nodes) {
|
|
|
|
|
names.clear();
|
|
|
|
|
names << "字:文本串";
|
2022-05-09 14:47:40 +08:00
|
|
|
|
if (findNodeText(node, names, content)) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief 查找elem的子节点
|
|
|
|
|
* @param elem 起始节点
|
|
|
|
|
* @param names 名称链
|
|
|
|
|
* @param nodes 查找到的全部结果
|
|
|
|
|
*/
|
|
|
|
|
void FileUtils::findNodes(const QDomElement &elem, QQueue<QString> &names, QList<QDomElement> &nodes)
|
|
|
|
|
{
|
|
|
|
|
QString targetName = names.dequeue();
|
|
|
|
|
QDomNode node = elem.firstChild();
|
|
|
|
|
while (!node.isNull()) {
|
|
|
|
|
QDomElement e = node.toElement();
|
|
|
|
|
if (!e.isNull() && e.tagName() == targetName) {
|
|
|
|
|
if (names.empty()) {
|
|
|
|
|
nodes.append(e);
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
findNodes(e, names, nodes);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
node = node.nextSibling();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline void FileUtils::findNodesByAttr(const QDomElement &elem, QQueue <QString> &names, QList <QDomElement> &nodes, const QString &attr, const QStringList &values)
|
|
|
|
|
{
|
|
|
|
|
findNodes(elem, names, nodes);
|
|
|
|
|
|
|
|
|
|
QList<QDomElement>::iterator it = nodes.begin();
|
|
|
|
|
while (it != nodes.end()) {
|
|
|
|
|
if ((*it).hasAttribute(attr) && values.contains((*it).attribute(attr))) {
|
|
|
|
|
it++;
|
|
|
|
|
} else {
|
|
|
|
|
it = nodes.erase(it);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline bool FileUtils::findNodeText(const QDomElement &elem, QQueue<QString> &names, QString &content)
|
|
|
|
|
{
|
|
|
|
|
QList<QDomElement> nodes;
|
|
|
|
|
findNodes(elem, names, nodes);
|
|
|
|
|
|
|
|
|
|
for (const auto &node : nodes) {
|
|
|
|
|
content.append(node.text());
|
|
|
|
|
if (content.length() >= MAX_CONTENT_LENGTH / 3) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline void FileUtils::findNodeAttr(const QDomElement &elem, QQueue<QString> &names, const QString &attr, QStringList &attrs)
|
|
|
|
|
{
|
|
|
|
|
QList<QDomElement> nodes;
|
|
|
|
|
findNodes(elem, names, nodes);
|
|
|
|
|
|
|
|
|
|
for (const auto &node : nodes) {
|
|
|
|
|
if (node.hasAttribute(attr)) {
|
|
|
|
|
attrs.append(node.attribute(attr));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-27 16:07:09 +08:00
|
|
|
|
/**
|
|
|
|
|
* uof2.0解析
|
|
|
|
|
* @brief 参考规范文档 https://www.doc88.com/p-9089133923912.html 或 GJB/Z 165-2012
|
|
|
|
|
* ppt文档的内容存放在graphics.xml中,需要先解析content中的引用再解析graphics内容
|
|
|
|
|
* @param path
|
|
|
|
|
* @param textContent
|
|
|
|
|
*/
|
2022-10-26 18:01:40 +08:00
|
|
|
|
void FileUtils::getUOF2TextContent(const QString &path, QString &textContent)
|
2022-05-27 16:07:09 +08:00
|
|
|
|
{
|
|
|
|
|
QFileInfo info = QFileInfo(path);
|
|
|
|
|
if (!info.exists() || info.isDir())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
QuaZip file(path);
|
|
|
|
|
if (!file.open(QuaZip::mdUnzip))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (!file.setCurrentFile("content.xml")) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QuaZipFile fileR(&file);
|
|
|
|
|
if (!fileR.open(QIODevice::ReadOnly)) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QXmlStreamReader reader(&fileR);
|
|
|
|
|
|
|
|
|
|
while (!reader.atEnd()) {
|
|
|
|
|
if (reader.readNextStartElement() && reader.name().toString() == "文本串_415B") {
|
|
|
|
|
textContent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
|
|
|
|
if (textContent.length() >= MAX_CONTENT_LENGTH / 3) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fileR.close();
|
|
|
|
|
file.close();
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-26 18:01:40 +08:00
|
|
|
|
void FileUtils::getUOF2PPTContent(const QString &path, QString &textContent)
|
2022-05-11 09:08:23 +08:00
|
|
|
|
{
|
|
|
|
|
QFileInfo info = QFileInfo(path);
|
|
|
|
|
if (!info.exists() || info.isDir())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
QuaZip zipFile(path);
|
|
|
|
|
QDomDocument doc;
|
|
|
|
|
if (!loadZipFileToDoc(zipFile, doc, "content.xml")) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QDomElement rootElem = doc.documentElement();
|
|
|
|
|
QList<QDomElement> nodes;
|
|
|
|
|
QQueue<QString> names; //每个节点的名称
|
|
|
|
|
names << "演:幻灯片集_6C0E" << "演:幻灯片_6C0F";
|
|
|
|
|
findNodes(rootElem, names, nodes);
|
|
|
|
|
|
|
|
|
|
if (nodes.empty()) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QStringList attrs;
|
|
|
|
|
for (const auto &node : nodes) {
|
|
|
|
|
names.clear();
|
|
|
|
|
names << "uof:锚点_C644";
|
|
|
|
|
findNodeAttr(node, names, "图形引用_C62E", attrs);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (attrs.empty()) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!loadZipFileToDoc(zipFile, doc, "graphics.xml")) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nodes.clear();
|
|
|
|
|
names.clear();
|
|
|
|
|
names << "图:图形_8062";
|
|
|
|
|
rootElem = doc.documentElement();
|
|
|
|
|
findNodesByAttr(rootElem, names, nodes, "标识符_804B", attrs);
|
|
|
|
|
|
|
|
|
|
QList<QDomElement> nodes416B; //字:段落_416B
|
|
|
|
|
for (const auto &node : nodes) {
|
|
|
|
|
names.clear();
|
|
|
|
|
names << "图:文本_803C" << "图:内容_8043" << "字:段落_416B";
|
|
|
|
|
findNodes(node, names, nodes416B);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nodes.clear();
|
|
|
|
|
for (const auto &node : nodes416B) {
|
|
|
|
|
names.clear();
|
|
|
|
|
names << "字:句_419D";
|
|
|
|
|
findNodes(node, names, nodes); //所有的 字:句_419D
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const auto &node : nodes) {
|
|
|
|
|
names.clear();
|
|
|
|
|
names << "字:文本串_415B";
|
|
|
|
|
if (findNodeText(node, names, textContent)) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline bool FileUtils::loadZipFileToDoc(QuaZip &zipFile, QDomDocument &doc, const QString &fileName)
|
|
|
|
|
{
|
|
|
|
|
if (!zipFile.isOpen() && !zipFile.open(QuaZip::mdUnzip)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!zipFile.setCurrentFile(fileName)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QuaZipFile file(&zipFile);
|
|
|
|
|
if (!file.open(QIODevice::ReadOnly)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
doc.clear();
|
|
|
|
|
if (!doc.setContent(&file)) {
|
|
|
|
|
file.close();
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
file.close();
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-27 16:07:09 +08:00
|
|
|
|
/**
|
|
|
|
|
* OFD文件解析
|
|
|
|
|
* @brief 参考: GB/T 33190-2016
|
|
|
|
|
* @param path
|
|
|
|
|
* @param textContent
|
|
|
|
|
*/
|
2022-10-26 18:01:40 +08:00
|
|
|
|
void FileUtils::getOFDTextContent(const QString &path, QString &textContent)
|
2022-05-27 16:07:09 +08:00
|
|
|
|
{
|
|
|
|
|
QFileInfo info = QFileInfo(path);
|
|
|
|
|
if (!info.exists() || info.isDir())
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
QuaZip zipfile(path);
|
|
|
|
|
if (!zipfile.open(QuaZip::mdUnzip))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// GB/T 33190-2016规范定义可以存在多个Doc_x目录,暂时只取第一个目录的内容
|
|
|
|
|
QString prefix("Doc_0/Pages/");
|
|
|
|
|
QStringList fileList;
|
|
|
|
|
for (const auto &file: zipfile.getFileNameList()) {
|
|
|
|
|
if (file.startsWith(prefix)) {
|
|
|
|
|
fileList << file;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < fileList.count(); ++i) {
|
|
|
|
|
QString filename = prefix + "Page_" + QString::number(i) + "/Content.xml";
|
|
|
|
|
if (!zipfile.setCurrentFile(filename)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QuaZipFile fileR(&zipfile);
|
|
|
|
|
fileR.open(QIODevice::ReadOnly);
|
|
|
|
|
QXmlStreamReader reader(&fileR);
|
|
|
|
|
|
|
|
|
|
while (!reader.atEnd()) {
|
|
|
|
|
if (reader.readNextStartElement() && reader.name().toString() == "TextCode") {
|
|
|
|
|
textContent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
|
|
|
|
if (textContent.length() >= MAX_CONTENT_LENGTH / 3) {
|
|
|
|
|
fileR.close();
|
|
|
|
|
zipfile.close();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fileR.close();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
zipfile.close();
|
|
|
|
|
}
|