ukui-search/libsearch/file-utils.cpp

1427 lines
45 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
* Modified by: zhangzihao <zhangzihao@kylinos.cn>
* Modified by: zhangjiaping <zhangjiaping@kylinos.cn>
*
*/
#include "file-utils.h"
#include <QXmlStreamReader>
#include <QMutexLocker>
#include <gio/gdesktopappinfo.h>
#include <QDBusMessage>
#include <QDBusConnection>
#include <QDomDocument>
#include <QDBusInterface>
#include <QDBusReply>
#include <QDesktopServices>
#include <QMimeDatabase>
#include <QCryptographicHash>
#include <QFileInfo>
#include <QFile>
#include <QApplication>
#include <QDir>
#include <QDebug>
#include <QUrl>
#include <QDomElement>
#include <QClipboard>
#include <QQueue>
#include <QFontMetrics>
#include <quazip5/quazipfile.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <quazip5/quazip.h>
#include <uchardet/uchardet.h>
#include <poppler/qt5/poppler-qt5.h>
#include "gobject-template.h"
#include "hanzi-to-pinyin.h"
#include "common.h"
using namespace UkuiSearch;
#define MAX_CONTENT_LENGTH 20480000
static QMutex iconMutex;
/**
* @brief 查找elem的子节点
* @param elem 起始节点
* @param names 名称链
* @param nodes 查找到的全部结果
*/
void findNodes(const QDomElement &elem, QQueue<QString> &names, QList<QDomElement> &nodes)
{
QString targetName = names.dequeue();
QDomNode node = elem.firstChild();
while (!node.isNull()) {
QDomElement e = node.toElement();
if (!e.isNull() && e.tagName() == targetName) {
if (names.empty()) {
nodes.append(e);
} else {
findNodes(e, names, nodes);
break;
}
}
node = node.nextSibling();
}
}
void findNodesByAttr(const QDomElement &elem, QQueue <QString> &names, QList <QDomElement> &nodes, const QString &attr, const QStringList &values)
{
findNodes(elem, names, nodes);
QList<QDomElement>::iterator it = nodes.begin();
while (it != nodes.end()) {
if ((*it).hasAttribute(attr) && values.contains((*it).attribute(attr))) {
it++;
} else {
it = nodes.erase(it);
}
}
}
bool findNodeText(const QDomElement &elem, QQueue<QString> &names, QString &content)
{
QList<QDomElement> nodes;
findNodes(elem, names, nodes);
for (const auto &node : nodes) {
content.append(node.text());
if (content.length() >= MAX_CONTENT_LENGTH / 3) {
return true;
}
}
return false;
}
void findNodeAttr(const QDomElement &elem, QQueue<QString> &names, const QString &attr, QStringList &attrs)
{
QList<QDomElement> nodes;
findNodes(elem, names, nodes);
for (const auto &node : nodes) {
if (node.hasAttribute(attr)) {
attrs.append(node.attribute(attr));
}
}
}
void processUOFPPT(const QDomDocument &doc, QString &content)
{
QDomElement rootElem = doc.documentElement();
QList<QDomElement> nodes;
QQueue<QString> names; //每个节点的名称
names << "uof:演示文稿" << "演:主体" << "演:幻灯片集" << "演:幻灯片";
findNodes(rootElem, names, nodes);
if (nodes.empty()) {
//TODO 在uof-ppt不存在锚点节点时直接查找文本节点
return;
}
QStringList objs;
//每一个 演:幻灯片 -> 锚点
for (const auto &node : nodes) {
names.clear();
names << "uof:锚点";
findNodeAttr(node, names, "uof:图形引用", objs);
}
nodes.clear();
names.clear();
names << "uof:对象集" << "图:图形";
findNodesByAttr(rootElem, names, nodes, "图:标识符", objs);
if (nodes.empty()) {
return;
}
QList<QDomElement> paraNodes; //全部段落节点
for (const auto &node : nodes) {
names.clear();
names << "图:文本内容" << "字:段落";
findNodes(node, names, paraNodes);
}
nodes.clear();
for (const auto &node : paraNodes) {
names.clear();
names << "字:句";
findNodes(node, names, nodes); //全部段落下的全部句节点
}
for (const auto &node : nodes) {
names.clear();
names << "字:文本串";
if (findNodeText(node, names, content)) {
break;
}
}
}
bool loadZipFileToDoc(QuaZip &zipFile, QDomDocument &doc, const QString &fileName)
{
if (!zipFile.isOpen() && !zipFile.open(QuaZip::mdUnzip)) {
return false;
}
if (!zipFile.setCurrentFile(fileName)) {
return false;
}
QuaZipFile file(&zipFile);
if (!file.open(QIODevice::ReadOnly)) {
return false;
}
doc.clear();
if (!doc.setContent(&file)) {
file.close();
return false;
}
file.close();
return true;
}
FileUtils::FileUtils() {
}
std::string FileUtils::makeDocUterm(QString path) {
return QCryptographicHash::hash(path.toUtf8(), QCryptographicHash::Md5).toHex().toStdString();
}
/**
* @brief FileUtils::getFileIcon 获取文件图标
* @param uri "file:///home/xxx/xxx/xxxx.txt"格式
* @param checkValid
* @return
*/
QIcon FileUtils::getFileIcon(const QString &uri, bool checkValid) {
Q_UNUSED(checkValid)
QMutexLocker locker(&iconMutex);
auto file = wrapGFile(g_file_new_for_uri(uri.toUtf8().constData()));
auto info = wrapGFileInfo(g_file_query_info(file.get()->get(),
G_FILE_ATTRIBUTE_STANDARD_ICON,
G_FILE_QUERY_INFO_NONE,
nullptr,
nullptr));
if(!G_IS_FILE_INFO(info.get()->get()))
return QIcon::fromTheme("unknown",QIcon(":/res/icons/unknown.svg"));
GIcon *g_icon = g_file_info_get_icon(info.get()->get());
//do not unref the GIcon from info.
if(G_IS_ICON(g_icon)) {
const gchar* const* icon_names = g_themed_icon_get_names(G_THEMED_ICON(g_icon));
if(icon_names) {
auto p = icon_names;
while(*p) {
QIcon icon = QIcon::fromTheme(*p);
if(!icon.isNull()) {
return icon;
} else {
p++;
}
}
}
}
return QIcon::fromTheme("unknown",QIcon(":/res/icons/unknown.svg"));
}
/**
* @brief FileUtils::getAppIcon 获取应用图标
* @param path .desktop文件的完整路径
* @return
*/
QIcon FileUtils::getAppIcon(const QString &path) {
QByteArray ba;
ba = path.toUtf8();
GKeyFile * keyfile;
keyfile = g_key_file_new();
if(!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)) {
g_key_file_free(keyfile);
return QIcon::fromTheme("unknown",QIcon(":/res/icons/unknown.svg"));
}
QString icon = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_ICON, NULL, NULL));
g_key_file_free(keyfile);
if(QIcon::fromTheme(icon).isNull()) {
return QIcon(":/res/icons/desktop.png");
}
return QIcon::fromTheme(icon);
}
/**
* @brief FileUtils::getSettingIcon 获取设置图标
* @param setting 设置项传入参数,格式为 About/About->Properties
* @param is_white 选择是否返回白色图标
* @return
*/
QIcon FileUtils::getSettingIcon(const QString &setting, const bool is_white) {
QString name = setting.left(setting.indexOf("/"));
if(! name.isEmpty()) {
name.replace(QString(name.at(0)), QString(name.at(0).toUpper()));
}
QString path;
if(is_white) {
path = QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1White.svg").arg(name);
} else {
path = QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1.svg").arg(name);
}
QFile file(path);
if(file.exists()) {
return QIcon(path);
} else {
return QIcon::fromTheme("ukui-control-center", QIcon(":/res/icons/ukui-control-center.svg")); //无插件图标时,返回控制面板应用图标
// if (is_white) {
// return QIcon(QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1White.svg").arg("About"));
// } else {
// return QIcon(QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1.svg").arg("About"));
// }
}
}
QIcon FileUtils::getSettingIcon() {
return QIcon::fromTheme("ukui-control-center", QIcon(":/res/icons/ukui-control-center.svg")); //返回控制面板应用图标
}
/**
* @brief FileUtils::getFileName 获取文件名
* @param uri 格式为"file:///home/xxx/xxx/xxxx.txt"
* @return
*/
QString FileUtils::getFileName(const QString &uri) {
QFileInfo info(uri);
if(info.exists()) {
return info.fileName();
} else {
return "Unknown File";
}
// QUrl url = uri;
// if (url.fileName().isEmpty()) {
// return "Unknown File";
// }
// return url.fileName();
}
/**
* @brief FileUtils::getAppName 获取应用名
* @param path .destop文件的完整路径
* @return
*/
QString FileUtils::getAppName(const QString &path) {
QByteArray ba;
ba = path.toUtf8();
GKeyFile * keyfile;
keyfile = g_key_file_new();
if(!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)) {
g_key_file_free(keyfile);
return "Unknown App";
}
QString name = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_NAME, NULL, NULL));
g_key_file_free(keyfile);
return name;
}
/**
* @brief FileUtils::getSettingName 获取设置项名
* @param setting 设置项传入参数,格式为 About/About->Properties
* @return
*/
QString FileUtils::getSettingName(const QString &setting) {
return setting.right(setting.length() - setting.lastIndexOf("/") - 1);
}
bool FileUtils::isOrUnder(QString pathA, QString pathB)
{
if (pathB == "/") {
return true;
}
if(pathA.length() < pathB.length())
return false;
if(pathA == pathB || pathA.startsWith(pathB + "/"))
return true;
return false;
}
QMimeType FileUtils::getMimetype(const QString &path) {
QMimeDatabase mdb;
QMimeType type = mdb.mimeTypeForFile(path, QMimeDatabase::MatchContent);
return type;
}
QStringList FileUtils::findMultiToneWords(const QString &hanzi) {
QStringList output, results;
HanZiToPinYin::getInstance()->getResults(hanzi.toStdString(), results);
QString oneResult(results.join(""));
QString firstLetter;
for (QString & info : results) {
if (!info.isEmpty())
firstLetter += info.at(0);
}
return output << oneResult << firstLetter;
}
/**
* @brief FileUtils::getDocxTextContent
* @param path: abs path
* @return docx to QString
*/
void FileUtils::getDocxTextContent(const QString &path, QString &textcontent) {
//fix me :optimized by xpath??
QFileInfo info = QFileInfo(path);
if(!info.exists() || info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) {
file.close();
return;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly); //读取方式打开
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()){
if(reader.readNextStartElement() and reader.name().toString() == "t"){
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
break;
}
}
}
fileR.close();
file.close();
return;
/* //原加载DOM文档方式
QDomDocument doc;
doc.setContent(fileR.readAll());
fileR.close();
QDomElement first = doc.firstChildElement("w:document");
QDomElement body = first.firstChildElement("w:body");
while(!body.isNull()) {
QDomElement wp = body.firstChildElement("w:p");
while(!wp.isNull()) {
QDomElement wr = wp.firstChildElement("w:r");
while(!wr.isNull()) {
QDomElement wt = wr.firstChildElement("w:t");
textcontent.append(wt.text().replace("\n", "")).replace("\r", " ");
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close();
return;
}
wr = wr.nextSiblingElement();
}
wp = wp.nextSiblingElement();
}
body = body.nextSiblingElement();
}
file.close();
return;
*/
}
void FileUtils::getPptxTextContent(const QString &path, QString &textcontent) {
QFileInfo info = QFileInfo(path);
if(!info.exists() || info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
QString prefix("ppt/slides/slide");
QStringList fileList;
for(QString i : file.getFileNameList()) {
if(i.startsWith(prefix))
fileList << i;
}
if(fileList.isEmpty()) {
file.close();
return;
}
for(int i = 0; i < fileList.size(); ++i){
QString name = prefix + QString::number(i + 1) + ".xml";
if(!file.setCurrentFile(name)) {
continue;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly);
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()){
if(reader.readNextStartElement() and reader.name().toString() == "t"){
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
break;
}
}
}
fileR.close();
}
file.close();
return;
/*
QDomElement sptree;
QDomElement sp;
QDomElement txbody;
QDomElement ap;
QDomElement ar;
QDomDocument doc;
QDomElement at;
// QDomNodeList atList;
for(int i = 0; i < fileList.size(); ++i) {
QString name = prefix + QString::number(i + 1) + ".xml";
if(!file.setCurrentFile(name)) {
continue;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly);
doc.clear();
doc.setContent(fileR.readAll());
fileR.close();
//fix me :optimized by xpath??
//This method looks better but slower,
//If xml file is very large with many useless node,this method will take a lot of time.
// atList = doc.elementsByTagName("a:t");
// for(int i = 0; i<atList.size(); ++i)
// {
// at = atList.at(i).toElement();
// if(!at.isNull())
// {
// textcontent.append(at.text().replace("\r","")).replace("\t"," ");
// if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
// {
// file.close();
// return;
// }
// }
// }
//This is ugly but seems more efficient when handel a large file.
sptree = doc.firstChildElement("p:sld").firstChildElement("p:cSld").firstChildElement("p:spTree");
while(!sptree.isNull()) {
sp = sptree.firstChildElement("p:sp");
while(!sp.isNull()) {
txbody = sp.firstChildElement("p:txBody");
while(!txbody.isNull()) {
ap = txbody.firstChildElement("a:p");
while(!ap.isNull()) {
ar = ap.firstChildElement("a:r");
while(!ar.isNull()) {
at = ar.firstChildElement("a:t");
textcontent.append(at.text().replace("\r", "")).replace("\t", "");
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close();
return;
}
ar = ar.nextSiblingElement();
}
ap = ap.nextSiblingElement();
}
txbody = txbody.nextSiblingElement();
}
sp = sp.nextSiblingElement();
}
sptree = sptree.nextSiblingElement();
}
}
file.close();
return;
*/
}
void FileUtils::getXlsxTextContent(const QString &path, QString &textcontent) {
QFileInfo info = QFileInfo(path);
if(!info.exists() || info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) {
file.close();
return;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly);
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()){
if(reader.readNextStartElement() and reader.name().toString() == "t"){
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
break;
}
}
}
fileR.close();
file.close();
return;
/*
QDomDocument doc;
doc.setContent(fileR.readAll());
fileR.close();
QDomElement sst = doc.firstChildElement("sst");
QDomElement si;
QDomElement r;
QDomElement t;
while(!sst.isNull()) {
si = sst.firstChildElement("si");
while(!si.isNull()) {
r = si.firstChildElement("r");
if(r.isNull()) {
t = si.firstChildElement("t");
} else {
t = r.firstChildElement("t");
}
if(t.isNull())
continue;
textcontent.append(t.text().replace("\r", "").replace("\n", ""));
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close();
return;
}
si = si.nextSiblingElement();
}
sst = sst.nextSiblingElement();
}
file.close();
return;
*/
}
void FileUtils::getPdfTextContent(const QString &path, QString &textcontent) {
Poppler::Document *doc = Poppler::Document::load(path);
if(doc->isLocked()) {
delete doc;
return;
}
const QRectF qf;
int pageNum = doc->numPages();
for(int i = 0; i < pageNum; ++i) {
Poppler::Page *page = doc->page(i);
if(page) {
textcontent.append(page->text(qf).replace("\n", "").replace("\r", " "));
delete page;
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
break;
}
}
delete doc;
return;
}
void FileUtils::getTxtContent(const QString &path, QString &textcontent) {
QFile file(path);
if(!file.open(QIODevice::ReadOnly | QIODevice::Text))
return;
QByteArray encodedString = file.read(MAX_CONTENT_LENGTH);
uchardet_t chardet = uchardet_new();
if(uchardet_handle_data(chardet, encodedString.constData(), encodedString.size()) != 0)
qWarning() << "Txt file encoding format detect fail!" << path;
uchardet_data_end(chardet);
const char *codec = uchardet_get_charset(chardet);
if(QTextCodec::codecForName(codec) == 0)
qWarning() << "Unsupported Text encoding format" << path << QString::fromLocal8Bit(codec);
QTextStream stream(encodedString, QIODevice::ReadOnly);
stream.setCodec(codec);
uchardet_delete(chardet);
textcontent = stream.readAll().replace("\n", "").replace("\r", " ");
file.close();
encodedString.clear();
chardet = NULL;
stream.flush();
return;
}
int FileUtils::openFile(QString &path, bool openInDir)
{
int res = -1;
if(openInDir) {
QStringList list;
list.append(path);
QDBusMessage message = QDBusMessage::createMethodCall("org.freedesktop.FileManager1",
"/org/freedesktop/FileManager1",
"org.freedesktop.FileManager1",
"ShowItems");
message.setArguments({list, "ukui-search"});
QDBusMessage messageRes = QDBusConnection::sessionBus().call(message);
if (QDBusMessage::ReplyMessage == messageRes.ReplyMessage) {
res = 0;
} else {
qDebug() << "Error! QDBusMessage reply error! ReplyMessage:" << messageRes.ReplyMessage;
res = -1;
}
} else {
auto file = wrapGFile(g_file_new_for_uri(QUrl::fromLocalFile(path).toString().toUtf8().constData()));
auto fileInfo = wrapGFileInfo(g_file_query_info(file.get()->get(),
"standard::*," "time::*," "access::*," "mountable::*," "metadata::*," "trash::*," G_FILE_ATTRIBUTE_ID_FILE,
G_FILE_QUERY_INFO_NONE,
nullptr,
nullptr));
QString mimeType = g_file_info_get_content_type (fileInfo.get()->get());
if (mimeType == nullptr) {
if (g_file_info_has_attribute(fileInfo.get()->get(), "standard::fast-content-type")) {
mimeType = g_file_info_get_attribute_string(fileInfo.get()->get(), "standard::fast-content-type");
}
}
GError *error = NULL;
GAppInfo *info = NULL;
/*
* g_app_info_get_default_for_type function get wrong default app, so we get the
* default app info from mimeapps.list, and chose the right default app for mimeType file
*/
QString mimeAppsListPath = QStandardPaths::writableLocation(QStandardPaths::HomeLocation)
+ "/.config/mimeapps.list";
GKeyFile *keyfile = g_key_file_new();
gboolean ret = g_key_file_load_from_file(keyfile, mimeAppsListPath.toUtf8(), G_KEY_FILE_NONE, &error);
if (false == ret) {
qWarning()<<"load mimeapps list error msg"<<error->message;
info = g_app_info_get_default_for_type(mimeType.toUtf8().constData(), false);
g_error_free(error);
} else {
gchar *desktopApp = g_key_file_get_string(keyfile, "Default Applications", mimeType.toUtf8(), &error);
if (NULL != desktopApp) {
info = (GAppInfo*)g_desktop_app_info_new(desktopApp);
g_free (desktopApp);
} else {
info = g_app_info_get_default_for_type(mimeType.toUtf8().constData(), false);
}
}
g_key_file_free (keyfile);
if(!G_IS_APP_INFO(info)) {
res = -1;
} else {
bool isSuccess(false);
QDBusInterface * appLaunchInterface = new QDBusInterface("com.kylin.AppManager",
"/com/kylin/AppManager",
"com.kylin.AppManager",
QDBusConnection::sessionBus());
if(!appLaunchInterface->isValid()) {
qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message());
isSuccess = false;
} else {
appLaunchInterface->setTimeout(10000);
QDBusReply<bool> reply = appLaunchInterface->call("LaunchDefaultAppWithUrl", QUrl::fromLocalFile(path).toString());
if(reply.isValid()) {
isSuccess = reply;
} else {
qWarning() << "SoftWareCenter dbus called failed!";
isSuccess = false;
}
}
if(appLaunchInterface) {
delete appLaunchInterface;
}
appLaunchInterface = NULL;
if (!isSuccess){
QDesktopServices::openUrl(QUrl::fromLocalFile(path));
}
res = 0;
}
g_object_unref(info);
}
return res;
}
bool FileUtils::copyPath(QString &path)
{
QApplication::clipboard()->setText(path);
return true;
}
QString FileUtils::escapeHtml(const QString &str)
{
QString temp = str;
temp.replace("<", "&lt;");
temp.replace(">", "&gt;");
return temp;
}
QString FileUtils::chineseSubString(const std::string &myStr, uint start, uint length, const QString &keyword)
{
std::string afterSub = "";
QString sub = QString::fromStdString(myStr);
if (length >= myStr.length()) {
afterSub = myStr.substr(start,length); //截取;
if (horizontalAdvanceContainsKeyword(QString::fromStdString(afterSub), keyword) >= 2*LABEL_MAX_WIDTH) {
sub = QString::fromStdString(afterSub);
}
return wrapData(sub, keyword);
}
//从关键字截length个字文本内容长度够截
if (start + length <= myStr.length()) {
afterSub = myStr.substr(start,length); //截取
sub = QString::fromStdString(afterSub); //转QString
if(start + length < myStr.length()){
sub.replace(sub.length() - 1, 1, ""); //最后一位可能为乱码,替换掉
}
sub = wrapData(sub, keyword);
} else {
uint newStart = myStr.length() - length; //从start截到末尾长度不够length更新截取位置到末尾前length个字的位置
afterSub = myStr.substr(newStart, length);
sub = QString::fromStdString(afterSub);
if (horizontalAdvanceContainsKeyword(QString::fromStdString(myStr.substr(newStart, start)), keyword) >= 2*LABEL_MAX_WIDTH) {
sub = wrapData(sub.replace(0, 1, ""), keyword, true);
} else {
if (newStart + 3 < start) {
sub.replace(0, 1, "")/*.append("…")*/; //第一个字有可能乱码,直接替换
} else {
afterSub = myStr.substr(start, length); //需要往前补三位以内说明补的全是乱码直接从start截就完了
sub = "" + QString::fromStdString(afterSub);
// sub.append("…");
}
sub = wrapData(sub, keyword);
}
}
return sub;
}
QIcon FileUtils::iconFromTheme(const QString &name, const QIcon &iconDefault)
{
QMutexLocker locker(&iconMutex);
return QIcon::fromTheme(name, iconDefault);
}
bool FileUtils::isOpenXMLFileEncrypted(const QString &path)
{
QFile file(path);
file.open(QIODevice::ReadOnly|QIODevice::Text);
QByteArray encrypt = file.read(4);
file.close();
if (encrypt.length() < 4) {
qDebug() << "Reading file error!" << path;
return true;
}
//比较前四位是否为对应值来判断OpenXML类型文件是否加密
if ((encrypt[0] & 0x50) && (encrypt[1] & 0x4b) && (encrypt[2] & 0x03) && (encrypt[3] & 0x04)) {
return false;
} else {
qDebug() << "Encrypt!" << path;
return true;
}
}
//todo: only support docx, pptx, xlsx
bool FileUtils::isEncrypedOrUnsupport(const QString& path, const QString& suffix)
{
QMimeType type = FileUtils::getMimetype(path);
QString name = type.name();
if(name == "application/zip") {
if (suffix == "docx" || suffix == "pptx" || suffix == "xlsx") {
return FileUtils::isOpenXMLFileEncrypted(path);
} else if (suffix == "uot" || suffix == "uos" || suffix == "uop") {
return false;
} else if (suffix == "ofd") {
return false;
} else {
return true;
}
} else if(name == "text/plain") {
if(suffix.endsWith("txt"))
return false;
return true;
} else if(name == "text/html") {
if(suffix.endsWith("html"))
return false;
return true;
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
if(suffix == "doc" || suffix == "dot" || suffix == "wps" || suffix == "ppt" ||
suffix == "pps" || suffix == "dps" || suffix == "et" || suffix == "xls" || suffix == "uof") {
return false;
}
return true;
} else if(name == "application/pdf") {
if(suffix == "pdf")
return false;
return true;
} else if(name == "application/xml" || name == "application/uof") {
if(suffix == "uof") {
return false;
}
return true;
} else if (true == targetPhotographTypeMap[suffix]) {
return !isOcrSupportSize(path);
} else {
// qInfo() << "Unsupport format:[" << path << "][" << type.name() << "]";
return true;
}
}
bool FileUtils::isOcrSupportSize(QString path)
{
/*
bool res;
Pix *image = pixRead(path.toStdString().data());
if (image->h < OCR_MIN_SIZE or image->w < OCR_MIN_SIZE) {//限制图片像素尺寸
qDebug() << "file:" << path << "is not right size.";
res = false;
} else
res = true;
pixDestroy(&image);
return res;
*/
QImage file(path);
if (file.height() < OCR_MIN_SIZE or file.width() < OCR_MIN_SIZE) {//限制图片像素尺寸
// qDebug() << "file:" << path << "is not right size.";
return false;
} else
return true;
}
QString FileUtils::getHtmlText(const QString &text, const QString &keyword)
{
QString htmlString = QString("<style>"
"span {"
"font-size:%0pt;"
"font-weight:bold;"
"}"
"</style>").arg(qApp->font().pointSizeF() + 2);
bool boldOpenned = false;
for(int i = 0; i < text.length(); i++) {
if((keyword.toUpper()).contains(QString(text.at(i)).toUpper())) {
if(! boldOpenned) {
boldOpenned = true;
htmlString.append(QString("<span>"));
}
htmlString.append(FileUtils::escapeHtml(QString(text.at(i))));
} else {
if(boldOpenned) {
boldOpenned = false;
htmlString.append(QString("</span>"));
}
htmlString.append(FileUtils::escapeHtml(QString(text.at(i))));
}
}
htmlString.replace("\n", "<br />");//替换换行符
return "<pre>" + htmlString + "</pre>";
}
QString FileUtils::setAllTextBold(const QString &name)
{
return QString("<h3 style=\"font-weight:normal;\"><pre>%1</pre></h3>").arg(escapeHtml(name));
}
QString FileUtils::wrapData(QLabel *p_label, const QString &text)
{
QString wrapText = text;
QFontMetrics fontMetrics = p_label->fontMetrics();
int textSize = fontMetrics.horizontalAdvance(wrapText);
if(textSize > LABEL_MAX_WIDTH){
int lastIndex = 0;
int count = 0;
for(int i = lastIndex; i < wrapText.length(); i++) {
if(fontMetrics.horizontalAdvance(wrapText.mid(lastIndex, i - lastIndex)) == LABEL_MAX_WIDTH) {
lastIndex = i;
wrapText.insert(i, '\n');
count++;
} else if(fontMetrics.horizontalAdvance(wrapText.mid(lastIndex, i - lastIndex)) > LABEL_MAX_WIDTH) {
lastIndex = i;
wrapText.insert(i - 1, '\n');
count++;
} else {
continue;
}
if(count == 2){
break;
}
}
}
// p_label->setText(wrapText);
return wrapText;
}
QString FileUtils::wrapData(const QString &text, const QString &keyword, bool elideLeft)
{
QString wrapText = text;
QFont boldFont(qApp->font().family());
boldFont.setPointSizeF(qApp->font().pointSizeF() + 2);
boldFont.setWeight(QFont::Bold);
QFontMetricsF boldMetricsF(boldFont);
QFont font(qApp->font().family());
font.setPointSizeF(qApp->font().pointSizeF());
QFontMetricsF fontMetricsF(font);
qreal blockLength = 0;
qreal total = 0;
int lineCount = 0;
int normalLength = 0;
int boldLength = 0;
if (elideLeft) {
for (int i = text.length() - 1; i >= 0; i--) {
if (keyword.toUpper().contains(text.at(i).toUpper())) {
if (normalLength) {
total += fontMetricsF.horizontalAdvance(text.mid(i + 1, normalLength));
normalLength = 0;
blockLength = 0;
}
if (boldLength) {
blockLength = boldMetricsF.horizontalAdvance(text.mid(i + 1, boldLength));
}
boldLength++;
} else {
if (boldLength) {
total += boldMetricsF.horizontalAdvance(text.mid(i + 1, boldLength));
boldLength = 0;
blockLength = 0;
}
if (normalLength) {
blockLength = fontMetricsF.horizontalAdvance(text.mid(i + 1, normalLength));
}
normalLength++;
}
if (!i) {
if (normalLength) {
blockLength = fontMetricsF.horizontalAdvance(text.left(normalLength));
}
if (boldLength) {
blockLength = boldMetricsF.horizontalAdvance(text.left(boldLength));
}
}
if (total + blockLength >= LABEL_MAX_WIDTH) {
i++;
if (total + blockLength > LABEL_MAX_WIDTH) {
if (normalLength) {
normalLength = 1;
} else {
boldLength = 1;
}
} else {
normalLength = 0;
boldLength = 0;
}
wrapText.insert(i + 1, '\n');
lineCount++;
total = 0;
blockLength = 0;
}
if (lineCount == 2) {
QString leftWord = text.left(i + 1);
if (!leftWord.isEmpty()) {
qreal distance = 2;//2是换行符加上要换第一个字
qreal wordSize = 0;
for (int index = i + 1; index < text.length(); index++) {
wordSize += keyword.toUpper().contains(text.at(index).toUpper()) ?
boldMetricsF.horizontalAdvance(text.at(index)) : fontMetricsF.horizontalAdvance(text.at(index));
if (wordSize < fontMetricsF.horizontalAdvance("")) {
distance++;//字长度比省略号要小,可能会挡上,所以再截一个字
} else {
break;
}
}
wrapText = wrapText.right(wrapText.size() - leftWord.size() - distance);
wrapText.prepend("");
}
break;
}
}
} else {
for (int i = 0; i <= text.length(); i++) {
if (i == text.length()) {
if (normalLength) {
blockLength = fontMetricsF.horizontalAdvance(text.right(normalLength));
}
if (boldLength) {
blockLength = boldMetricsF.horizontalAdvance(text.right(boldLength));
}
} else {
if (keyword.toUpper().contains(text.at(i).toUpper())) {
if (normalLength) {
total += fontMetricsF.horizontalAdvance(text.mid(i - normalLength, normalLength));
normalLength = 0;
blockLength = 0;
}
if (boldLength) {
blockLength = boldMetricsF.horizontalAdvance(text.mid(i - boldLength, boldLength));
}
boldLength++;
} else {
if (boldLength) {
total += boldMetricsF.horizontalAdvance(text.mid(i - boldLength, boldLength));
boldLength = 0;
blockLength = 0;
}
if (normalLength) {
blockLength = fontMetricsF.horizontalAdvance(text.mid(i - normalLength, normalLength));
}
normalLength++;
}
}
if (total + blockLength >= LABEL_MAX_WIDTH) {
i--;
if (total + blockLength > LABEL_MAX_WIDTH) {
wrapText.insert(i + lineCount, '\n');
if (normalLength) {
normalLength = 1;
} else {
boldLength = 1;
}
} else {
wrapText.insert(i + 1 + lineCount, '\n');
normalLength = 0;
boldLength = 0;
}
lineCount++;
total = 0;
blockLength = 0;
}
if (lineCount == 2) {
QString leftWord = text.mid(i);
if (!leftWord.isEmpty()) {
qreal distance = 2;
qreal wordSize = 0;
for (int index = i; index > 0; index--) {
wordSize += keyword.toUpper().contains(text.at(index).toUpper()) ?
boldMetricsF.horizontalAdvance(text.at(index)) : fontMetricsF.horizontalAdvance(text.at(index));
if (wordSize < fontMetricsF.horizontalAdvance("")) {
distance++;
} else {
break;
}
}
wrapText = wrapText.left(wrapText.size() - leftWord.size() - distance);
wrapText.append("");
}
break;
}
}
}
return wrapText;
}
qreal FileUtils::horizontalAdvanceContainsKeyword(const QString &content, const QString &keyword)
{
QFont boldFont(qApp->font().family());
boldFont.setPointSizeF(qApp->font().pointSizeF() + 2);
boldFont.setWeight(QFont::Bold);
QFontMetricsF boldMetricsF(boldFont);
QFont font(qApp->font().family());
font.setPointSizeF(qApp->font().pointSizeF());
QFontMetricsF fontMetricsF(font);
qreal contentSize = 0;
int boldLength = 0;
int normalLength = 0;
for (int i = 0; i < content.length(); i++) {
if (keyword.toUpper().contains(content.at(i).toUpper())) {
boldLength++;
if (normalLength) {
contentSize += boldMetricsF.horizontalAdvance(content.mid(i - normalLength, normalLength));
normalLength = 0;
}
} else {
normalLength++;
if (boldLength) {
contentSize += boldMetricsF.horizontalAdvance(content.mid(i - boldLength, boldLength));
boldLength = 0;
}
}
}
if (boldLength) {
contentSize += boldMetricsF.horizontalAdvance(content.right(boldLength));
}
if (normalLength) {
contentSize += fontMetricsF.horizontalAdvance(content.right(normalLength));
}
return contentSize;
}
/**
* uof1.0解析
* 参考规范GB/T 20916-2007
* 1.文字处理
* 2.电子表格
* 3.演示文稿
* ppt的内容存放在对象集中
* 可以通过演示文稿-主体-幻灯片集-幻灯片下的锚点属性获取引用了哪些内容:
* <uof:锚点 uof:图形引用="OBJ16"/>
* 目标:文本串
*/
void FileUtils::getUOFTextContent(const QString &path, QString &textContent)
{
QFileInfo info(path);
if (!info.exists() || info.isDir()) {
return;
}
QFile file(path);
if (!file.open(QIODevice::ReadOnly)) {
return;
}
QDomDocument doc;
if (!doc.setContent(&file)) {
file.close();
return;
}
file.close();
bool isPPT = false;
QDomElement rootElem = doc.documentElement();
QDomNode node = rootElem.firstChild();
while (!node.isNull()) {
QDomElement e = node.toElement();
if (!e.isNull() && e.tagName() == "uof:演示文稿") {
isPPT = true;
break;
}
node = node.nextSibling();
}
//单独处理pdf文档
if (isPPT) {
qDebug() << path << "is PPT";
processUOFPPT(doc, textContent);
return;
}
file.open(QIODevice::ReadOnly);
QXmlStreamReader reader(&file);
while (!reader.atEnd()) {
//适用于文字处理与电子表格
if (reader.readNextStartElement() && reader.name().toString() == "文本串") {
textContent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if (textContent.length() >= MAX_CONTENT_LENGTH / 3) {
break;
}
}
}
file.close();
}
/**
* uof2.0解析
* @brief 参考规范文档 https://www.doc88.com/p-9089133923912.html 或 GJB/Z 165-2012
* ppt文档的内容存放在graphics.xml中需要先解析content中的引用再解析graphics内容
* @param path
* @param textContent
*/
void FileUtils::getUOF2TextContent(const QString &path, QString &textContent)
{
QFileInfo info = QFileInfo(path);
if (!info.exists() || info.isDir())
return;
QuaZip file(path);
if (!file.open(QuaZip::mdUnzip))
return;
if (!file.setCurrentFile("content.xml")) {
return;
}
QuaZipFile fileR(&file);
if (!fileR.open(QIODevice::ReadOnly)) {
return;
}
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()) {
if (reader.readNextStartElement() && reader.name().toString() == "文本串_415B") {
textContent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if (textContent.length() >= MAX_CONTENT_LENGTH / 3) {
break;
}
}
}
fileR.close();
file.close();
}
void FileUtils::getUOF2PPTContent(const QString &path, QString &textContent)
{
QFileInfo info = QFileInfo(path);
if (!info.exists() || info.isDir())
return;
QuaZip zipFile(path);
QDomDocument doc;
if (!loadZipFileToDoc(zipFile, doc, "content.xml")) {
return;
}
QDomElement rootElem = doc.documentElement();
QList<QDomElement> nodes;
QQueue<QString> names; //每个节点的名称
names << "演:幻灯片集_6C0E" << "演:幻灯片_6C0F";
findNodes(rootElem, names, nodes);
if (nodes.empty()) {
return;
}
QStringList attrs;
for (const auto &node : nodes) {
names.clear();
names << "uof:锚点_C644";
findNodeAttr(node, names, "图形引用_C62E", attrs);
}
if (attrs.empty()) {
return;
}
if (!loadZipFileToDoc(zipFile, doc, "graphics.xml")) {
return;
}
nodes.clear();
names.clear();
names << "图:图形_8062";
rootElem = doc.documentElement();
findNodesByAttr(rootElem, names, nodes, "标识符_804B", attrs);
QList<QDomElement> nodes416B; //字:段落_416B
for (const auto &node : nodes) {
names.clear();
names << "图:文本_803C" << "图:内容_8043" << "字:段落_416B";
findNodes(node, names, nodes416B);
}
nodes.clear();
for (const auto &node : nodes416B) {
names.clear();
names << "字:句_419D";
findNodes(node, names, nodes); //所有的 字:句_419D
}
for (const auto &node : nodes) {
names.clear();
names << "字:文本串_415B";
if (findNodeText(node, names, textContent)) {
break;
}
}
}
/**
* OFD文件解析
* @brief 参考: GB/T 33190-2016
* @param path
* @param textContent
*/
void FileUtils::getOFDTextContent(const QString &path, QString &textContent)
{
QFileInfo info = QFileInfo(path);
if (!info.exists() || info.isDir())
return;
QuaZip zipfile(path);
if (!zipfile.open(QuaZip::mdUnzip))
return;
// GB/T 33190-2016规范定义可以存在多个Doc_x目录暂时只取第一个目录的内容
QString prefix("Doc_0/Pages/");
QStringList fileList;
for (const auto &file: zipfile.getFileNameList()) {
if (file.startsWith(prefix)) {
fileList << file;
}
}
for (int i = 0; i < fileList.count(); ++i) {
QString filename = prefix + "Page_" + QString::number(i) + "/Content.xml";
if (!zipfile.setCurrentFile(filename)) {
continue;
}
QuaZipFile fileR(&zipfile);
fileR.open(QIODevice::ReadOnly);
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()) {
if (reader.readNextStartElement() && reader.name().toString() == "TextCode") {
textContent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if (textContent.length() >= MAX_CONTENT_LENGTH / 3) {
fileR.close();
zipfile.close();
return;
}
}
}
fileR.close();
}
zipfile.close();
}