2021-01-29 11:43:07 +08:00
|
|
|
|
/*
|
|
|
|
|
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
|
|
|
*
|
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
*
|
|
|
|
|
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
|
|
|
|
|
* Modified by: zhangzihao <zhangzihao@kylinos.cn>
|
|
|
|
|
* Modified by: zhangjiaping <zhangjiaping@kylinos.cn>
|
|
|
|
|
*
|
|
|
|
|
*/
|
2020-12-21 18:50:54 +08:00
|
|
|
|
#include "file-utils.h"
|
2021-01-22 09:49:44 +08:00
|
|
|
|
|
|
|
|
|
|
2021-01-10 09:23:02 +08:00
|
|
|
|
size_t FileUtils::_max_index_count = 0;
|
|
|
|
|
size_t FileUtils::_current_index_count = 0;
|
2021-01-22 17:15:43 +08:00
|
|
|
|
unsigned short FileUtils::_index_status = 0;
|
2021-04-16 15:35:54 +08:00
|
|
|
|
FileUtils::SearchMethod FileUtils::searchMethod = FileUtils::SearchMethod::DIRECTSEARCH;
|
2020-12-26 12:45:28 +08:00
|
|
|
|
QMap<QString, QStringList> FileUtils::map_chinese2pinyin = QMap<QString, QStringList>();
|
2020-12-21 18:50:54 +08:00
|
|
|
|
|
|
|
|
|
FileUtils::FileUtils()
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-26 08:56:38 +08:00
|
|
|
|
std::string FileUtils::makeDocUterm(QString path)
|
2020-12-21 18:50:54 +08:00
|
|
|
|
{
|
2021-02-02 14:05:32 +08:00
|
|
|
|
return QCryptographicHash::hash(path.toUtf8(),QCryptographicHash::Md5).toHex().toStdString();
|
2020-12-21 18:50:54 +08:00
|
|
|
|
}
|
2020-12-24 11:06:19 +08:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getFileIcon 获取文件图标
|
|
|
|
|
* @param uri "file:///home/xxx/xxx/xxxx.txt"格式
|
|
|
|
|
* @param checkValid
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
QIcon FileUtils::getFileIcon(const QString &uri, bool checkValid)
|
|
|
|
|
{
|
|
|
|
|
auto file = wrapGFile(g_file_new_for_uri(uri.toUtf8().constData()));
|
|
|
|
|
auto info = wrapGFileInfo(g_file_query_info(file.get()->get(),
|
2021-01-09 11:25:07 +08:00
|
|
|
|
G_FILE_ATTRIBUTE_STANDARD_ICON,
|
|
|
|
|
G_FILE_QUERY_INFO_NONE,
|
|
|
|
|
nullptr,
|
|
|
|
|
nullptr));
|
2020-12-24 11:06:19 +08:00
|
|
|
|
if (!G_IS_FILE_INFO (info.get()->get()))
|
2020-12-25 19:16:44 +08:00
|
|
|
|
return QIcon::fromTheme("unknown");
|
2020-12-24 11:06:19 +08:00
|
|
|
|
GIcon *g_icon = g_file_info_get_icon (info.get()->get());
|
|
|
|
|
QString icon_name;
|
|
|
|
|
//do not unref the GIcon from info.
|
|
|
|
|
if (G_IS_ICON(g_icon)) {
|
|
|
|
|
const gchar* const* icon_names = g_themed_icon_get_names(G_THEMED_ICON (g_icon));
|
|
|
|
|
if (icon_names) {
|
|
|
|
|
auto p = icon_names;
|
|
|
|
|
if (*p)
|
|
|
|
|
icon_name = QString (*p);
|
|
|
|
|
if (checkValid) {
|
|
|
|
|
while (*p) {
|
|
|
|
|
QIcon icon = QIcon::fromTheme(*p);
|
|
|
|
|
if (!icon.isNull()) {
|
|
|
|
|
icon_name = QString (*p);
|
|
|
|
|
break;
|
|
|
|
|
} else {
|
|
|
|
|
p++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-12-25 19:16:44 +08:00
|
|
|
|
if (QIcon::fromTheme(icon_name).isNull()) {
|
|
|
|
|
return QIcon::fromTheme("unknown");
|
|
|
|
|
}
|
2020-12-24 11:06:19 +08:00
|
|
|
|
return QIcon::fromTheme(icon_name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getAppIcon 获取应用图标
|
|
|
|
|
* @param path .desktop文件的完整路径
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
QIcon FileUtils::getAppIcon(const QString &path) {
|
|
|
|
|
QByteArray ba;
|
|
|
|
|
ba = path.toUtf8();
|
|
|
|
|
GKeyFile * keyfile;
|
|
|
|
|
keyfile = g_key_file_new();
|
|
|
|
|
if (!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)){
|
|
|
|
|
g_key_file_free (keyfile);
|
2020-12-25 19:16:44 +08:00
|
|
|
|
return QIcon::fromTheme("unknown");
|
2020-12-24 11:06:19 +08:00
|
|
|
|
}
|
|
|
|
|
QString icon = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_ICON, NULL, NULL));
|
|
|
|
|
g_key_file_free(keyfile);
|
|
|
|
|
if (QIcon::fromTheme(icon).isNull()) {
|
|
|
|
|
return QIcon(":/res/icons/desktop.png");
|
|
|
|
|
}
|
|
|
|
|
return QIcon::fromTheme(icon);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getSettingIcon 获取设置图标
|
|
|
|
|
* @param setting 设置项传入参数,格式为 About/About->Properties
|
2020-12-25 19:16:44 +08:00
|
|
|
|
* @param is_white 选择是否返回白色图标
|
2020-12-24 11:06:19 +08:00
|
|
|
|
* @return
|
|
|
|
|
*/
|
2020-12-25 19:16:44 +08:00
|
|
|
|
QIcon FileUtils::getSettingIcon(const QString& setting, const bool& is_white) {
|
2020-12-24 11:06:19 +08:00
|
|
|
|
QString name = setting.left(setting.indexOf("/"));
|
2020-12-30 17:23:03 +08:00
|
|
|
|
if (! name.isEmpty()) {
|
|
|
|
|
name.replace(QString(name.at(0)), QString(name.at(0).toUpper()));
|
|
|
|
|
}
|
2020-12-25 19:16:44 +08:00
|
|
|
|
QString path;
|
|
|
|
|
if (is_white) {
|
|
|
|
|
path = QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1White.svg").arg(name);
|
|
|
|
|
} else {
|
|
|
|
|
path = QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1.svg").arg(name);
|
|
|
|
|
}
|
2020-12-24 11:06:19 +08:00
|
|
|
|
QFile file(path);
|
|
|
|
|
if (file.exists()) {
|
|
|
|
|
return QIcon(path);
|
|
|
|
|
} else {
|
2021-01-14 15:19:25 +08:00
|
|
|
|
return QIcon::fromTheme("ukui-control-center"); //无插件图标时,返回控制面板应用图标
|
|
|
|
|
// if (is_white) {
|
|
|
|
|
// return QIcon(QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1White.svg").arg("About"));
|
|
|
|
|
// } else {
|
|
|
|
|
// return QIcon(QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1.svg").arg("About"));
|
|
|
|
|
// }
|
2020-12-24 11:06:19 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getFileName 获取文件名
|
2021-01-20 15:33:49 +08:00
|
|
|
|
* @param uri 格式为"file:///home/xxx/xxx/xxxx.txt"
|
2020-12-24 11:06:19 +08:00
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
QString FileUtils::getFileName(const QString& uri) {
|
2021-01-20 15:33:49 +08:00
|
|
|
|
QFileInfo info(uri);
|
|
|
|
|
if (info.exists()) {
|
|
|
|
|
return info.fileName();
|
|
|
|
|
} else {
|
2020-12-25 19:16:44 +08:00
|
|
|
|
return "Unknown File";
|
|
|
|
|
}
|
2021-01-20 15:33:49 +08:00
|
|
|
|
// QUrl url = uri;
|
|
|
|
|
// if (url.fileName().isEmpty()) {
|
|
|
|
|
// return "Unknown File";
|
|
|
|
|
// }
|
|
|
|
|
// return url.fileName();
|
2020-12-24 11:06:19 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getAppName 获取应用名
|
|
|
|
|
* @param path .destop文件的完整路径
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
QString FileUtils::getAppName(const QString& path) {
|
|
|
|
|
QByteArray ba;
|
|
|
|
|
ba = path.toUtf8();
|
|
|
|
|
GKeyFile * keyfile;
|
|
|
|
|
keyfile = g_key_file_new();
|
|
|
|
|
if (!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)){
|
|
|
|
|
g_key_file_free (keyfile);
|
|
|
|
|
return "Unknown App";
|
|
|
|
|
}
|
|
|
|
|
QString name = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_NAME, NULL, NULL));
|
|
|
|
|
g_key_file_free(keyfile);
|
|
|
|
|
return name;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getSettingName 获取设置项名
|
|
|
|
|
* @param setting 设置项传入参数,格式为 About/About->Properties
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
QString FileUtils::getSettingName(const QString& setting) {
|
2020-12-25 19:16:44 +08:00
|
|
|
|
return setting.right(setting.length() - setting.lastIndexOf("/") - 1);
|
2020-12-24 11:06:19 +08:00
|
|
|
|
}
|
2020-12-26 12:45:28 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void FileUtils::loadHanziTable(const QString &fileName)
|
|
|
|
|
{
|
|
|
|
|
QFile file(fileName);
|
|
|
|
|
if (!file.open(QFile::ReadOnly | QFile::Text)) {
|
|
|
|
|
qDebug("File: '%s' open failed!", file.fileName().toStdString().c_str());
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* 读取汉字对照表文件并转换为QMap存储 */
|
|
|
|
|
while(!file.atEnd()) {
|
|
|
|
|
QString content = QString::fromUtf8(file.readLine());
|
|
|
|
|
FileUtils::map_chinese2pinyin[content.split(" ").last().trimmed()] = content.split(" ").first().split(",");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
file.close();
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-04 14:10:00 +08:00
|
|
|
|
QMimeType FileUtils::getMimetype(QString &path)
|
2020-12-29 20:18:36 +08:00
|
|
|
|
{
|
|
|
|
|
QMimeDatabase mdb;
|
|
|
|
|
QMimeType type = mdb.mimeTypeForFile(path,QMimeDatabase::MatchContent);
|
2021-03-04 14:10:00 +08:00
|
|
|
|
|
|
|
|
|
return type;
|
2020-12-29 20:18:36 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-01-03 16:58:26 +08:00
|
|
|
|
//aborted
|
2020-12-26 12:45:28 +08:00
|
|
|
|
QString FileUtils::find(const QString &hanzi)
|
|
|
|
|
{
|
2021-01-09 11:25:07 +08:00
|
|
|
|
// static QMap<QString, QStringList> map = loadHanziTable("://index/pinyinWithoutTone.txt");
|
|
|
|
|
// static QMap<QString, QStringList> map;
|
2020-12-26 12:45:28 +08:00
|
|
|
|
QString output;
|
|
|
|
|
QStringList stringList = hanzi.split("");
|
|
|
|
|
|
|
|
|
|
/* 遍历查找汉字-拼音对照表的内容并将汉字替换为拼音 */
|
|
|
|
|
for (const QString &str : stringList) {
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(str))
|
|
|
|
|
output += FileUtils::map_chinese2pinyin[str].first();
|
|
|
|
|
else
|
|
|
|
|
output += str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return output;
|
|
|
|
|
}
|
2020-12-29 16:41:30 +08:00
|
|
|
|
|
2020-12-30 15:56:23 +08:00
|
|
|
|
//DFS多音字太多直接GG
|
2020-12-29 19:30:48 +08:00
|
|
|
|
void stitchMultiToneWordsDFS(const QString& hanzi, const QString& resultAllPinYin, const QString& resultFirst, QStringList& resultList){
|
2020-12-29 16:41:30 +08:00
|
|
|
|
if (hanzi.size() == 0){
|
2020-12-29 19:30:48 +08:00
|
|
|
|
resultList.append(resultAllPinYin);
|
|
|
|
|
resultList.append(resultFirst);
|
2020-12-29 16:41:30 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(hanzi.at(0))){
|
|
|
|
|
for (auto i : FileUtils::map_chinese2pinyin[hanzi.at(0)]){
|
2020-12-29 19:30:48 +08:00
|
|
|
|
stitchMultiToneWordsDFS(hanzi.right(hanzi.size() - 1), resultAllPinYin + i, resultFirst + i.at(0), resultList);
|
2020-12-29 16:41:30 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
2020-12-29 19:30:48 +08:00
|
|
|
|
stitchMultiToneWordsDFS(hanzi.right(hanzi.size() - 1), resultAllPinYin + hanzi.at(0), resultFirst + hanzi.at(0), resultList);
|
2020-12-29 16:41:30 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-30 15:56:23 +08:00
|
|
|
|
//BFS+Stack多音字太多会爆栈
|
|
|
|
|
void stitchMultiToneWordsBFSStack(const QString& hanzi, QStringList& resultList){
|
|
|
|
|
QString tempHanzi, resultAllPinYin, resultFirst;
|
|
|
|
|
QQueue<QString> tempQueue;
|
|
|
|
|
tempHanzi = hanzi;
|
|
|
|
|
int tempQueueSize = 0;
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
|
|
|
|
|
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
|
|
|
|
|
tempQueue.enqueue(i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
tempQueue.enqueue(tempHanzi.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
while (tempHanzi.size() != 0) {
|
|
|
|
|
tempQueueSize = tempQueue.size();
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
|
|
|
|
|
for (int j = 0; j < tempQueueSize; ++j){
|
|
|
|
|
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
|
|
|
|
|
tempQueue.enqueue(tempQueue.head() + i);
|
|
|
|
|
}
|
|
|
|
|
tempQueue.dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
for (int j = 0; j < tempQueueSize; ++j){
|
|
|
|
|
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
|
|
|
|
|
tempQueue.dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
}
|
|
|
|
|
while(!tempQueue.empty()){
|
|
|
|
|
resultList.append(tempQueue.dequeue());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//BFS+Heap,多音字太多会耗尽内存
|
|
|
|
|
void stitchMultiToneWordsBFSHeap(const QString& hanzi, QStringList& resultList){
|
|
|
|
|
QString tempHanzi, resultAllPinYin, resultFirst;
|
|
|
|
|
QQueue<QString>* tempQueue = new QQueue<QString>;
|
|
|
|
|
tempHanzi = hanzi;
|
|
|
|
|
int tempQueueSize = 0;
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
|
|
|
|
|
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
|
|
|
|
|
tempQueue->enqueue(i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
tempQueue->enqueue(tempHanzi.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
while (tempHanzi.size() != 0) {
|
|
|
|
|
tempQueueSize = tempQueue->size();
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
|
|
|
|
|
for (int j = 0; j < tempQueueSize; ++j){
|
|
|
|
|
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
|
|
|
|
|
tempQueue->enqueue(tempQueue->head() + i);
|
|
|
|
|
}
|
|
|
|
|
tempQueue->dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
for (int j = 0; j < tempQueueSize; ++j){
|
|
|
|
|
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
|
|
|
|
|
tempQueue->dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
}
|
|
|
|
|
while(!tempQueue->empty()){
|
|
|
|
|
resultList.append(tempQueue->dequeue());
|
|
|
|
|
}
|
|
|
|
|
delete tempQueue;
|
|
|
|
|
tempQueue = nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//BFS+Heap+超过3个多音字只建一个索引,比较折中的方案
|
|
|
|
|
void stitchMultiToneWordsBFSHeapLess3(const QString& hanzi, QStringList& resultList){
|
|
|
|
|
QString tempHanzi, resultAllPinYin, resultFirst;
|
|
|
|
|
QQueue<QString>* tempQueue = new QQueue<QString>;
|
|
|
|
|
QQueue<QString>* tempQueueFirst = new QQueue<QString>;
|
|
|
|
|
tempHanzi = hanzi;
|
|
|
|
|
int tempQueueSize = 0;
|
|
|
|
|
int multiToneWordNum = 0;
|
|
|
|
|
for (auto i : hanzi){
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(i)){
|
|
|
|
|
if (FileUtils::map_chinese2pinyin[i].size() > 1){
|
|
|
|
|
++multiToneWordNum;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (multiToneWordNum > 3){
|
|
|
|
|
QString oneResult, oneResultFirst;
|
|
|
|
|
for (auto i : hanzi){
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(i)){
|
|
|
|
|
oneResult += FileUtils::map_chinese2pinyin[i].first();
|
|
|
|
|
oneResultFirst += FileUtils::map_chinese2pinyin[i].first().at(0);
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
oneResult += i;
|
|
|
|
|
oneResultFirst += i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
resultList.append(oneResult);
|
|
|
|
|
resultList.append(oneResultFirst);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
|
|
|
|
|
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
|
|
|
|
|
tempQueue->enqueue(i);
|
|
|
|
|
tempQueueFirst->enqueue(i.at(0));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
tempQueue->enqueue(tempHanzi.at(0));
|
|
|
|
|
tempQueueFirst->enqueue(tempHanzi.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
while (tempHanzi.size() != 0) {
|
|
|
|
|
tempQueueSize = tempQueue->size();
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
|
|
|
|
|
for (int j = 0; j < tempQueueSize; ++j){
|
|
|
|
|
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
|
|
|
|
|
tempQueue->enqueue(tempQueue->head() + i);
|
|
|
|
|
tempQueueFirst->enqueue(tempQueueFirst->head() + i.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempQueue->dequeue();
|
|
|
|
|
tempQueueFirst->dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
for (int j = 0; j < tempQueueSize; ++j){
|
|
|
|
|
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
|
|
|
|
|
tempQueueFirst->enqueue(tempQueueFirst->head() + tempHanzi.at(0));
|
|
|
|
|
tempQueue->dequeue();
|
|
|
|
|
tempQueueFirst->dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
}
|
|
|
|
|
while(!tempQueue->empty()){
|
|
|
|
|
resultList.append(tempQueue->dequeue());
|
|
|
|
|
resultList.append(tempQueueFirst->dequeue());
|
|
|
|
|
}
|
|
|
|
|
delete tempQueue;
|
|
|
|
|
delete tempQueueFirst;
|
|
|
|
|
tempQueue = nullptr;
|
|
|
|
|
tempQueueFirst = nullptr;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-03 16:58:26 +08:00
|
|
|
|
//BFS+Stack+超过3个多音字只建一个索引,比较折中的方案
|
|
|
|
|
void stitchMultiToneWordsBFSStackLess3(const QString& hanzi, QStringList& resultList){
|
|
|
|
|
QString tempHanzi, resultAllPinYin, resultFirst;
|
|
|
|
|
QQueue<QString> tempQueue;
|
|
|
|
|
QQueue<QString> tempQueueFirst;
|
|
|
|
|
tempHanzi = hanzi;
|
|
|
|
|
int tempQueueSize = 0;
|
|
|
|
|
int multiToneWordNum = 0;
|
|
|
|
|
for (auto i : hanzi){
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(i)){
|
|
|
|
|
if (FileUtils::map_chinese2pinyin[i].size() > 1){
|
|
|
|
|
++multiToneWordNum;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (multiToneWordNum > 3){
|
|
|
|
|
QString oneResult, oneResultFirst;
|
|
|
|
|
for (auto i : hanzi){
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(i)){
|
|
|
|
|
oneResult += FileUtils::map_chinese2pinyin[i].first();
|
|
|
|
|
oneResultFirst += FileUtils::map_chinese2pinyin[i].first().at(0);
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
oneResult += i;
|
|
|
|
|
oneResultFirst += i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
resultList.append(oneResult);
|
|
|
|
|
resultList.append(oneResultFirst);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
|
|
|
|
|
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
|
|
|
|
|
tempQueue.enqueue(i);
|
|
|
|
|
tempQueueFirst.enqueue(i.at(0));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
tempQueue.enqueue(tempHanzi.at(0));
|
|
|
|
|
tempQueueFirst.enqueue(tempHanzi.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
while (tempHanzi.size() != 0) {
|
|
|
|
|
tempQueueSize = tempQueue.size();
|
|
|
|
|
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
|
|
|
|
|
for (int j = 0; j < tempQueueSize; ++j){
|
|
|
|
|
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
|
|
|
|
|
tempQueue.enqueue(tempQueue.head() + i);
|
|
|
|
|
tempQueueFirst.enqueue(tempQueueFirst.head() + i.at(0));
|
|
|
|
|
}
|
|
|
|
|
tempQueue.dequeue();
|
|
|
|
|
tempQueueFirst.dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else{
|
|
|
|
|
for (int j = 0; j < tempQueueSize; ++j){
|
|
|
|
|
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
|
|
|
|
|
tempQueueFirst.enqueue(tempQueueFirst.head() + tempHanzi.at(0));
|
|
|
|
|
tempQueue.dequeue();
|
|
|
|
|
tempQueueFirst.dequeue();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
|
|
|
}
|
|
|
|
|
while(!tempQueue.empty()){
|
|
|
|
|
resultList.append(tempQueue.dequeue());
|
|
|
|
|
resultList.append(tempQueueFirst.dequeue());
|
|
|
|
|
}
|
2021-01-09 11:25:07 +08:00
|
|
|
|
// delete tempQueue;
|
|
|
|
|
// delete tempQueueFirst;
|
|
|
|
|
// tempQueue = nullptr;
|
|
|
|
|
// tempQueueFirst = nullptr;
|
2021-01-03 16:58:26 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-29 16:41:30 +08:00
|
|
|
|
QStringList FileUtils::findMultiToneWords(const QString& hanzi)
|
|
|
|
|
{
|
2021-01-09 11:25:07 +08:00
|
|
|
|
// QStringList* output = new QStringList();
|
2020-12-29 16:41:30 +08:00
|
|
|
|
QStringList output;
|
2020-12-29 19:30:48 +08:00
|
|
|
|
QString tempAllPinYin, tempFirst;
|
2020-12-29 16:41:30 +08:00
|
|
|
|
QStringList stringList = hanzi.split("");
|
|
|
|
|
|
2021-01-09 11:25:07 +08:00
|
|
|
|
// stitchMultiToneWordsDFS(hanzi, tempAllPinYin, tempFirst, output);
|
2021-01-03 16:58:26 +08:00
|
|
|
|
stitchMultiToneWordsBFSStackLess3(hanzi, output);
|
2021-01-09 11:25:07 +08:00
|
|
|
|
// qDebug() << output;
|
2020-12-29 16:41:30 +08:00
|
|
|
|
return output;
|
2020-12-29 20:38:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
2020-12-29 20:18:36 +08:00
|
|
|
|
/**
|
|
|
|
|
* @brief FileUtils::getDocxTextContent
|
|
|
|
|
* @param path: abs path
|
|
|
|
|
* @return docx to QString
|
|
|
|
|
*/
|
2021-01-12 16:07:50 +08:00
|
|
|
|
void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
|
2020-12-29 20:18:36 +08:00
|
|
|
|
{
|
2021-04-08 16:11:58 +08:00
|
|
|
|
//fix me :optimized by xpath??
|
2020-12-29 20:18:36 +08:00
|
|
|
|
QFileInfo info = QFileInfo(path);
|
|
|
|
|
if(!info.exists()||info.isDir())
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2020-12-31 21:14:13 +08:00
|
|
|
|
QuaZip file(path);
|
|
|
|
|
if(!file.open(QuaZip::mdUnzip))
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2020-12-29 20:18:36 +08:00
|
|
|
|
|
2020-12-31 21:14:13 +08:00
|
|
|
|
if(!file.setCurrentFile("word/document.xml",QuaZip::csSensitive))
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2020-12-29 20:18:36 +08:00
|
|
|
|
QuaZipFile fileR(&file);
|
|
|
|
|
|
|
|
|
|
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
|
|
|
|
|
|
|
|
|
QDomDocument doc;
|
|
|
|
|
doc.setContent(fileR.readAll());
|
2021-04-08 16:11:58 +08:00
|
|
|
|
fileR.close();
|
2020-12-29 20:18:36 +08:00
|
|
|
|
QDomElement first = doc.firstChildElement("w:document");
|
2021-01-19 20:59:46 +08:00
|
|
|
|
QDomElement body = first.firstChildElement("w:body");
|
|
|
|
|
while(!body.isNull())
|
2020-12-29 20:18:36 +08:00
|
|
|
|
{
|
2021-01-19 20:59:46 +08:00
|
|
|
|
QDomElement wp= body.firstChildElement("w:p");
|
|
|
|
|
while(!wp.isNull())
|
2020-12-29 20:18:36 +08:00
|
|
|
|
{
|
2021-01-19 20:59:46 +08:00
|
|
|
|
QDomElement wr= wp.firstChildElement("w:r");
|
|
|
|
|
while(!wr.isNull())
|
|
|
|
|
{
|
|
|
|
|
QDomElement wt = wr.firstChildElement("w:t");
|
|
|
|
|
textcontent.append(wt.text().replace("\n",""));
|
2021-04-08 16:11:58 +08:00
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
2021-03-16 17:21:10 +08:00
|
|
|
|
{
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
}
|
2021-01-19 20:59:46 +08:00
|
|
|
|
wr = wr.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
wp = wp.nextSiblingElement();
|
2020-12-29 20:18:36 +08:00
|
|
|
|
}
|
2021-01-19 20:59:46 +08:00
|
|
|
|
body = body.nextSiblingElement();
|
2020-12-29 20:18:36 +08:00
|
|
|
|
}
|
2021-01-19 20:59:46 +08:00
|
|
|
|
file.close();
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2020-12-29 20:18:36 +08:00
|
|
|
|
}
|
|
|
|
|
|
2021-04-08 16:11:58 +08:00
|
|
|
|
void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
|
|
|
|
|
{
|
|
|
|
|
QFileInfo info = QFileInfo(path);
|
|
|
|
|
if(!info.exists()||info.isDir())
|
|
|
|
|
return;
|
|
|
|
|
QuaZip file(path);
|
|
|
|
|
if(!file.open(QuaZip::mdUnzip))
|
|
|
|
|
return;
|
|
|
|
|
QString prefix("ppt/slides/slide");
|
|
|
|
|
QStringList fileList;
|
|
|
|
|
for(QString i : file.getFileNameList())
|
|
|
|
|
{
|
|
|
|
|
if(i.startsWith(prefix))
|
|
|
|
|
fileList<<i;
|
|
|
|
|
}
|
|
|
|
|
if(fileList.isEmpty())
|
|
|
|
|
return;
|
|
|
|
|
QDomElement sptree;
|
|
|
|
|
QDomElement sp;
|
|
|
|
|
QDomElement txbody;
|
|
|
|
|
QDomElement ap;
|
|
|
|
|
QDomElement ar;
|
2021-04-13 13:57:02 +08:00
|
|
|
|
QDomDocument doc;
|
2021-04-08 16:11:58 +08:00
|
|
|
|
QDomElement at;
|
2021-04-13 13:57:02 +08:00
|
|
|
|
// QDomNodeList atList;
|
2021-04-08 16:11:58 +08:00
|
|
|
|
for(int i =0;i<fileList.size();++i)
|
|
|
|
|
{
|
|
|
|
|
QString name = prefix + QString::number(i+1) + ".xml";
|
|
|
|
|
if(!file.setCurrentFile(name))
|
|
|
|
|
{
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
QuaZipFile fileR(&file);
|
|
|
|
|
fileR.open(QIODevice::ReadOnly);
|
2021-04-13 13:57:02 +08:00
|
|
|
|
doc.clear();
|
2021-04-08 16:11:58 +08:00
|
|
|
|
doc.setContent(fileR.readAll());
|
|
|
|
|
fileR.close();
|
2021-04-13 13:57:02 +08:00
|
|
|
|
|
|
|
|
|
//fix me :optimized by xpath??
|
|
|
|
|
//This method looks better but slower,
|
|
|
|
|
//If xml file is very large with many useless node,this method will take a lot of time.
|
|
|
|
|
|
|
|
|
|
// atList = doc.elementsByTagName("a:t");
|
|
|
|
|
// for(int i = 0; i<atList.size(); ++i)
|
|
|
|
|
// {
|
|
|
|
|
// at = atList.at(i).toElement();
|
|
|
|
|
// if(!at.isNull())
|
|
|
|
|
// {
|
|
|
|
|
// textcontent.append(at.text().replace("\r","")).replace("\t"," ");
|
|
|
|
|
// if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
|
|
|
|
// {
|
|
|
|
|
// file.close();
|
|
|
|
|
// return;
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
//This is ugly but seems more efficient when handel a large file.
|
2021-04-08 16:11:58 +08:00
|
|
|
|
sptree = doc.firstChildElement("p:sld").firstChildElement("p:cSld").firstChildElement("p:spTree");
|
|
|
|
|
while(!sptree.isNull())
|
|
|
|
|
{
|
|
|
|
|
sp= sptree.firstChildElement("p:sp");
|
|
|
|
|
while(!sp.isNull())
|
|
|
|
|
{
|
|
|
|
|
txbody= sp.firstChildElement("p:txBody");
|
|
|
|
|
while(!txbody.isNull())
|
|
|
|
|
{
|
|
|
|
|
ap = txbody.firstChildElement("a:p");
|
|
|
|
|
while(!ap.isNull())
|
|
|
|
|
{
|
|
|
|
|
ar = ap.firstChildElement("a:r");
|
|
|
|
|
while(!ar.isNull())
|
|
|
|
|
{
|
|
|
|
|
at = ar.firstChildElement("a:t");
|
2021-04-15 09:19:36 +08:00
|
|
|
|
textcontent.append(at.text().replace("\r","")).replace("\t","");
|
2021-04-08 16:11:58 +08:00
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
|
|
|
|
{
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
ar = ar.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
ap = ap.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
txbody = txbody.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
sp = sp.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
sptree = sptree.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-13 14:53:55 +08:00
|
|
|
|
void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
|
|
|
|
|
{
|
|
|
|
|
QFileInfo info = QFileInfo(path);
|
|
|
|
|
if(!info.exists()||info.isDir())
|
|
|
|
|
return;
|
|
|
|
|
QuaZip file(path);
|
|
|
|
|
if(!file.open(QuaZip::mdUnzip))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if(!file.setCurrentFile("xl/sharedStrings.xml",QuaZip::csSensitive))
|
|
|
|
|
return;
|
|
|
|
|
QuaZipFile fileR(&file);
|
|
|
|
|
|
|
|
|
|
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
|
|
|
|
|
|
|
|
|
QDomDocument doc;
|
|
|
|
|
doc.setContent(fileR.readAll());
|
|
|
|
|
fileR.close();
|
|
|
|
|
QDomElement sst = doc.firstChildElement("sst");
|
|
|
|
|
QDomElement si;
|
|
|
|
|
QDomElement r;
|
|
|
|
|
QDomElement t;
|
|
|
|
|
while(!sst.isNull())
|
|
|
|
|
{
|
|
|
|
|
si= sst.firstChildElement("si");
|
|
|
|
|
while(!si.isNull())
|
|
|
|
|
{
|
|
|
|
|
r= si.firstChildElement("r");
|
|
|
|
|
if(r.isNull())
|
|
|
|
|
{
|
|
|
|
|
t= si.firstChildElement("t");
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
t = r.firstChildElement("t");
|
|
|
|
|
}
|
|
|
|
|
if(t.isNull())
|
|
|
|
|
continue;
|
2021-04-15 09:19:36 +08:00
|
|
|
|
textcontent.append(t.text().replace("\r","").replace("\n",""));
|
2021-04-13 14:53:55 +08:00
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
|
|
|
|
{
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
si = si.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
sst = sst.nextSiblingElement();
|
|
|
|
|
}
|
|
|
|
|
file.close();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-15 09:19:36 +08:00
|
|
|
|
void FileUtils::getPdfTextContent(QString &path, QString &textcontent)
|
|
|
|
|
{
|
|
|
|
|
Poppler::Document *doc = Poppler::Document::load(path);
|
|
|
|
|
if(doc->isLocked())
|
|
|
|
|
return;
|
|
|
|
|
const QRectF qf;
|
|
|
|
|
int pageNum = doc->numPages();
|
|
|
|
|
for(int i = 0; i<pageNum; ++i)
|
|
|
|
|
{
|
|
|
|
|
textcontent.append(doc->page(i)->text(qf).replace("\n",""));
|
|
|
|
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
delete doc;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-12 16:07:50 +08:00
|
|
|
|
void FileUtils::getTxtContent(QString &path, QString &textcontent)
|
2020-12-29 20:18:36 +08:00
|
|
|
|
{
|
|
|
|
|
QFile file(path);
|
|
|
|
|
if(!file.open(QIODevice::ReadOnly|QIODevice::Text))
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2021-01-14 20:56:14 +08:00
|
|
|
|
|
2021-04-08 16:11:58 +08:00
|
|
|
|
QByteArray encodedString = file.read(MAX_CONTENT_LENGTH);
|
2021-01-14 20:56:14 +08:00
|
|
|
|
|
|
|
|
|
uchardet_t chardet = uchardet_new();
|
|
|
|
|
if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0)
|
|
|
|
|
qWarning()<<"Txt file encoding format detect fail!"<<path;
|
|
|
|
|
|
|
|
|
|
uchardet_data_end(chardet);
|
|
|
|
|
const char *codec = uchardet_get_charset(chardet);
|
|
|
|
|
|
|
|
|
|
if(QTextCodec::codecForName(codec) == 0)
|
2021-03-04 14:10:00 +08:00
|
|
|
|
qWarning()<<"Unsupported Text encoding format"<<path<<QString::fromLocal8Bit(codec);
|
2021-01-14 20:56:14 +08:00
|
|
|
|
|
|
|
|
|
QTextStream stream(encodedString,QIODevice::ReadOnly);
|
|
|
|
|
stream.setCodec(codec);
|
2021-02-07 10:11:30 +08:00
|
|
|
|
uchardet_delete(chardet);
|
2021-01-14 20:56:14 +08:00
|
|
|
|
|
|
|
|
|
textcontent = stream.readAll().replace("\n","");
|
2021-01-19 19:26:39 +08:00
|
|
|
|
|
|
|
|
|
file.close();
|
|
|
|
|
encodedString.clear();
|
|
|
|
|
chardet = NULL;
|
|
|
|
|
stream.flush();
|
|
|
|
|
|
2021-01-12 16:07:50 +08:00
|
|
|
|
return;
|
2020-12-29 16:41:30 +08:00
|
|
|
|
}
|