ukui-search/libsearch/file-utils.cpp

1436 lines
45 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
* Modified by: zhangzihao <zhangzihao@kylinos.cn>
* Modified by: zhangjiaping <zhangjiaping@kylinos.cn>
*
*/
#include "file-utils.h"
#include <QXmlStreamReader>
#include <QMutexLocker>
#include <gio/gdesktopappinfo.h>
#include <QDBusMessage>
#include <QDBusConnection>
#include <QDomDocument>
#include <QDBusInterface>
#include <QDBusReply>
#include <QDesktopServices>
#include <QMimeDatabase>
#include <QCryptographicHash>
#include <QFileInfo>
#include <QFile>
#include <QApplication>
#include <QDir>
#include <QDebug>
#include <QUrl>
#include <QDomElement>
#include <QClipboard>
#include <QQueue>
#include <QFontMetrics>
#include <QTextBoundaryFinder>
#include <quazip5/quazipfile.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <quazip5/quazip.h>
#include <uchardet/uchardet.h>
#include <poppler/qt5/poppler-qt5.h>
#include "gobject-template.h"
#include "hanzi-to-pinyin.h"
#include "common.h"
#include "icon-loader.h"
using namespace UkuiSearch;
#define MAX_CONTENT_LENGTH 20480000
/**
* @brief 查找elem的子节点
* @param elem 起始节点
* @param names 名称链
* @param nodes 查找到的全部结果
*/
void findNodes(const QDomElement &elem, QQueue<QString> &names, QList<QDomElement> &nodes)
{
QString targetName = names.dequeue();
QDomNode node = elem.firstChild();
while (!node.isNull()) {
QDomElement e = node.toElement();
if (!e.isNull() && e.tagName() == targetName) {
if (names.empty()) {
nodes.append(e);
} else {
findNodes(e, names, nodes);
break;
}
}
node = node.nextSibling();
}
}
void findNodesByAttr(const QDomElement &elem, QQueue <QString> &names, QList <QDomElement> &nodes, const QString &attr, const QStringList &values)
{
findNodes(elem, names, nodes);
QList<QDomElement>::iterator it = nodes.begin();
while (it != nodes.end()) {
if ((*it).hasAttribute(attr) && values.contains((*it).attribute(attr))) {
it++;
} else {
it = nodes.erase(it);
}
}
}
bool findNodeText(const QDomElement &elem, QQueue<QString> &names, QString &content)
{
QList<QDomElement> nodes;
findNodes(elem, names, nodes);
for (const auto &node : nodes) {
content.append(node.text());
if (content.length() >= MAX_CONTENT_LENGTH / 3) {
return true;
}
}
return false;
}
void findNodeAttr(const QDomElement &elem, QQueue<QString> &names, const QString &attr, QStringList &attrs)
{
QList<QDomElement> nodes;
findNodes(elem, names, nodes);
for (const auto &node : nodes) {
if (node.hasAttribute(attr)) {
attrs.append(node.attribute(attr));
}
}
}
void processUOFPPT(const QDomDocument &doc, QString &content)
{
QDomElement rootElem = doc.documentElement();
QList<QDomElement> nodes;
QQueue<QString> names; //每个节点的名称
names << "uof:演示文稿" << "演:主体" << "演:幻灯片集" << "演:幻灯片";
findNodes(rootElem, names, nodes);
if (nodes.empty()) {
//TODO 在uof-ppt不存在锚点节点时直接查找文本节点
return;
}
QStringList objs;
//每一个 演:幻灯片 -> 锚点
for (const auto &node : nodes) {
names.clear();
names << "uof:锚点";
findNodeAttr(node, names, "uof:图形引用", objs);
}
nodes.clear();
names.clear();
names << "uof:对象集" << "图:图形";
findNodesByAttr(rootElem, names, nodes, "图:标识符", objs);
if (nodes.empty()) {
return;
}
QList<QDomElement> paraNodes; //全部段落节点
for (const auto &node : nodes) {
names.clear();
names << "图:文本内容" << "字:段落";
findNodes(node, names, paraNodes);
}
nodes.clear();
for (const auto &node : paraNodes) {
names.clear();
names << "字:句";
findNodes(node, names, nodes); //全部段落下的全部句节点
}
for (const auto &node : nodes) {
names.clear();
names << "字:文本串";
if (findNodeText(node, names, content)) {
break;
}
}
}
bool loadZipFileToDoc(QuaZip &zipFile, QDomDocument &doc, const QString &fileName)
{
if (!zipFile.isOpen() && !zipFile.open(QuaZip::mdUnzip)) {
return false;
}
if (!zipFile.setCurrentFile(fileName)) {
return false;
}
QuaZipFile file(&zipFile);
if (!file.open(QIODevice::ReadOnly)) {
return false;
}
doc.clear();
if (!doc.setContent(&file)) {
file.close();
return false;
}
file.close();
return true;
}
FileUtils::FileUtils() {
}
std::string FileUtils::makeDocUterm(QString path) {
return QCryptographicHash::hash(path.toUtf8(), QCryptographicHash::Md5).toHex().toStdString();
}
/**
* @brief FileUtils::getFileIcon 获取文件图标
* @param uri "file:///home/xxx/xxx/xxxx.txt"格式
* @param checkValid
* @return
*/
QIcon FileUtils::getFileIcon(const QString &uri, bool checkValid) {
Q_UNUSED(checkValid)
auto file = wrapGFile(g_file_new_for_uri(uri.toUtf8().constData()));
auto info = wrapGFileInfo(g_file_query_info(file.get()->get(),
G_FILE_ATTRIBUTE_STANDARD_ICON,
G_FILE_QUERY_INFO_NONE,
nullptr,
nullptr));
if(!G_IS_FILE_INFO(info.get()->get()))
return IconLoader::loadIconQt("unknown",QIcon(":/res/icons/unknown.svg"));
GIcon *g_icon = g_file_info_get_icon(info.get()->get());
//do not unref the GIcon from info.
if(G_IS_ICON(g_icon)) {
const gchar* const* icon_names = g_themed_icon_get_names(G_THEMED_ICON(g_icon));
if(icon_names) {
auto p = icon_names;
while(*p) {
QIcon icon = IconLoader::loadIconQt(*p);
if(!icon.isNull()) {
return icon;
} else {
p++;
}
}
}
}
return IconLoader::loadIconQt("unknown", QIcon(":/res/icons/unknown.svg"));
}
QIcon FileUtils::getSettingIcon() {
return IconLoader::loadIconQt("ukui-control-center", QIcon(":/res/icons/ukui-control-center.svg")); //返回控制面板应用图标
// 返回控制面板应用图标
}
/**
* @brief FileUtils::getFileName 获取文件名
* @param uri 格式为"file:///home/xxx/xxx/xxxx.txt"
* @return
*/
QString FileUtils::getFileName(const QString &uri) {
QFileInfo info(uri);
if(info.exists()) {
return info.fileName();
} else {
return "Unknown File";
}
// QUrl url = uri;
// if (url.fileName().isEmpty()) {
// return "Unknown File";
// }
// return url.fileName();
}
/**
* @brief FileUtils::getAppName 获取应用名
* @param path .destop文件的完整路径
* @return
*/
QString FileUtils::getAppName(const QString &path) {
QByteArray ba;
ba = path.toUtf8();
GKeyFile * keyfile;
keyfile = g_key_file_new();
if(!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)) {
g_key_file_free(keyfile);
return "Unknown App";
}
QString name = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_NAME, NULL, NULL));
g_key_file_free(keyfile);
return name;
}
/**
* @brief FileUtils::getSettingName 获取设置项名
* @param setting 设置项传入参数,格式为 About/About->Properties
* @return
*/
QString FileUtils::getSettingName(const QString &setting) {
return setting.right(setting.length() - setting.lastIndexOf("/") - 1);
}
bool FileUtils::isOrUnder(QString pathA, QString pathB)
{
if (pathB == "/") {
return true;
}
if(pathA.length() < pathB.length())
return false;
if(pathA == pathB || pathA.startsWith(pathB + "/"))
return true;
return false;
}
QMimeType FileUtils::getMimetype(const QString &path) {
QMimeDatabase mdb;
QMimeType type = mdb.mimeTypeForFile(path, QMimeDatabase::MatchContent);
return type;
}
QStringList FileUtils::findMultiToneWords(const QString &hanzi) {
QStringList output, results;
HanZiToPinYin::getInstance()->getResults(hanzi.toStdString(), results);
QString oneResult(results.join(""));
QString firstLetter;
for (QString & info : results) {
if (!info.isEmpty())
firstLetter += info.at(0);
}
return output << oneResult << firstLetter;
}
/**
* @brief FileUtils::getDocxTextContent
* @param path: abs path
* @return docx to QString
*/
void FileUtils::getDocxTextContent(const QString &path, QString &textcontent) {
//fix me :optimized by xpath??
QFileInfo info = QFileInfo(path);
if(!info.exists() || info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) {
file.close();
return;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly); //读取方式打开
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()){
if(reader.readNextStartElement() and reader.name().toString() == "t"){
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
break;
}
}
}
fileR.close();
file.close();
return;
/* //原加载DOM文档方式
QDomDocument doc;
doc.setContent(fileR.readAll());
fileR.close();
QDomElement first = doc.firstChildElement("w:document");
QDomElement body = first.firstChildElement("w:body");
while(!body.isNull()) {
QDomElement wp = body.firstChildElement("w:p");
while(!wp.isNull()) {
QDomElement wr = wp.firstChildElement("w:r");
while(!wr.isNull()) {
QDomElement wt = wr.firstChildElement("w:t");
textcontent.append(wt.text().replace("\n", "")).replace("\r", " ");
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close();
return;
}
wr = wr.nextSiblingElement();
}
wp = wp.nextSiblingElement();
}
body = body.nextSiblingElement();
}
file.close();
return;
*/
}
void FileUtils::getPptxTextContent(const QString &path, QString &textcontent) {
QFileInfo info = QFileInfo(path);
if(!info.exists() || info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
QString prefix("ppt/slides/slide");
QStringList fileList;
for(QString i : file.getFileNameList()) {
if(i.startsWith(prefix))
fileList << i;
}
if(fileList.isEmpty()) {
file.close();
return;
}
for(int i = 0; i < fileList.size(); ++i){
QString name = prefix + QString::number(i + 1) + ".xml";
if(!file.setCurrentFile(name)) {
continue;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly);
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()){
if(reader.readNextStartElement() and reader.name().toString() == "t"){
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
break;
}
}
}
fileR.close();
}
file.close();
return;
/*
QDomElement sptree;
QDomElement sp;
QDomElement txbody;
QDomElement ap;
QDomElement ar;
QDomDocument doc;
QDomElement at;
// QDomNodeList atList;
for(int i = 0; i < fileList.size(); ++i) {
QString name = prefix + QString::number(i + 1) + ".xml";
if(!file.setCurrentFile(name)) {
continue;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly);
doc.clear();
doc.setContent(fileR.readAll());
fileR.close();
//fix me :optimized by xpath??
//This method looks better but slower,
//If xml file is very large with many useless node,this method will take a lot of time.
// atList = doc.elementsByTagName("a:t");
// for(int i = 0; i<atList.size(); ++i)
// {
// at = atList.at(i).toElement();
// if(!at.isNull())
// {
// textcontent.append(at.text().replace("\r","")).replace("\t"," ");
// if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
// {
// file.close();
// return;
// }
// }
// }
//This is ugly but seems more efficient when handel a large file.
sptree = doc.firstChildElement("p:sld").firstChildElement("p:cSld").firstChildElement("p:spTree");
while(!sptree.isNull()) {
sp = sptree.firstChildElement("p:sp");
while(!sp.isNull()) {
txbody = sp.firstChildElement("p:txBody");
while(!txbody.isNull()) {
ap = txbody.firstChildElement("a:p");
while(!ap.isNull()) {
ar = ap.firstChildElement("a:r");
while(!ar.isNull()) {
at = ar.firstChildElement("a:t");
textcontent.append(at.text().replace("\r", "")).replace("\t", "");
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close();
return;
}
ar = ar.nextSiblingElement();
}
ap = ap.nextSiblingElement();
}
txbody = txbody.nextSiblingElement();
}
sp = sp.nextSiblingElement();
}
sptree = sptree.nextSiblingElement();
}
}
file.close();
return;
*/
}
void FileUtils::getXlsxTextContent(const QString &path, QString &textcontent) {
QFileInfo info = QFileInfo(path);
if(!info.exists() || info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) {
file.close();
return;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly);
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()){
if(reader.readNextStartElement() and reader.name().toString() == "t"){
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
break;
}
}
}
fileR.close();
file.close();
return;
/*
QDomDocument doc;
doc.setContent(fileR.readAll());
fileR.close();
QDomElement sst = doc.firstChildElement("sst");
QDomElement si;
QDomElement r;
QDomElement t;
while(!sst.isNull()) {
si = sst.firstChildElement("si");
while(!si.isNull()) {
r = si.firstChildElement("r");
if(r.isNull()) {
t = si.firstChildElement("t");
} else {
t = r.firstChildElement("t");
}
if(t.isNull())
continue;
textcontent.append(t.text().replace("\r", "").replace("\n", ""));
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close();
return;
}
si = si.nextSiblingElement();
}
sst = sst.nextSiblingElement();
}
file.close();
return;
*/
}
void FileUtils::getPdfTextContent(const QString &path, QString &textcontent) {
Poppler::Document *doc = Poppler::Document::load(path);
if(doc->isLocked()) {
delete doc;
return;
}
const QRectF qf;
int pageNum = doc->numPages();
for(int i = 0; i < pageNum; ++i) {
Poppler::Page *page = doc->page(i);
if(page) {
textcontent.append(page->text(qf).replace("\n", "").replace("\r", " "));
delete page;
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
break;
}
}
delete doc;
return;
}
void FileUtils::getTxtContent(const QString &path, QString &textcontent) {
QFile file(path);
if(!file.open(QIODevice::ReadOnly | QIODevice::Text))
return;
QByteArray encodedString = file.read(MAX_CONTENT_LENGTH);
uchardet_t chardet = uchardet_new();
if(uchardet_handle_data(chardet, encodedString.constData(), encodedString.size()) != 0)
qWarning() << "Txt file encoding format detect fail!" << path;
uchardet_data_end(chardet);
const char *codec = uchardet_get_charset(chardet);
if(QTextCodec::codecForName(codec) == nullptr) {
qWarning() << "Unsupported Text encoding format" << path << QString::fromLocal8Bit(codec);
return;
}
QTextStream stream(encodedString, QIODevice::ReadOnly);
stream.setCodec(codec);
uchardet_delete(chardet);
textcontent = stream.readAll().replace("\n", "").replace("\r", " ");
file.close();
encodedString.clear();
chardet = NULL;
stream.flush();
return;
}
int FileUtils::openFile(QString &path, bool openInDir)
{
int res = -1;
if(openInDir) {
QStringList list;
list.append(path);
QDBusMessage message = QDBusMessage::createMethodCall("org.freedesktop.FileManager1",
"/org/freedesktop/FileManager1",
"org.freedesktop.FileManager1",
"ShowItems");
message.setArguments({list, "ukui-search"});
QDBusMessage messageRes = QDBusConnection::sessionBus().call(message);
if (QDBusMessage::ReplyMessage == messageRes.ReplyMessage) {
res = 0;
} else {
qDebug() << "Error! QDBusMessage reply error! ReplyMessage:" << messageRes.ReplyMessage;
res = -1;
}
} else {
auto file = wrapGFile(g_file_new_for_uri(QUrl::fromLocalFile(path).toString().toUtf8().constData()));
auto fileInfo = wrapGFileInfo(g_file_query_info(file.get()->get(),
"standard::*," "time::*," "access::*," "mountable::*," "metadata::*," "trash::*," G_FILE_ATTRIBUTE_ID_FILE,
G_FILE_QUERY_INFO_NONE,
nullptr,
nullptr));
QString mimeType = g_file_info_get_content_type (fileInfo.get()->get());
if (mimeType == nullptr) {
if (g_file_info_has_attribute(fileInfo.get()->get(), "standard::fast-content-type")) {
mimeType = g_file_info_get_attribute_string(fileInfo.get()->get(), "standard::fast-content-type");
}
}
GError *error = NULL;
GAppInfo *info = NULL;
/*
* g_app_info_get_default_for_type function get wrong default app, so we get the
* default app info from mimeapps.list, and chose the right default app for mimeType file
*/
QString mimeAppsListPath = QStandardPaths::writableLocation(QStandardPaths::HomeLocation)
+ "/.config/mimeapps.list";
GKeyFile *keyfile = g_key_file_new();
gboolean ret = g_key_file_load_from_file(keyfile, mimeAppsListPath.toUtf8(), G_KEY_FILE_NONE, &error);
if (false == ret) {
qWarning()<<"load mimeapps list error msg"<<error->message;
info = g_app_info_get_default_for_type(mimeType.toUtf8().constData(), false);
g_error_free(error);
} else {
gchar *desktopApp = g_key_file_get_string(keyfile, "Default Applications", mimeType.toUtf8(), &error);
if (NULL != desktopApp) {
info = (GAppInfo*)g_desktop_app_info_new(desktopApp);
g_free (desktopApp);
} else {
info = g_app_info_get_default_for_type(mimeType.toUtf8().constData(), false);
}
}
g_key_file_free (keyfile);
if(!G_IS_APP_INFO(info)) {
res = -1;
} else {
bool isSuccess(false);
QDBusInterface * appLaunchInterface = new QDBusInterface("com.kylin.AppManager",
"/com/kylin/AppManager",
"com.kylin.AppManager",
QDBusConnection::sessionBus());
if(!appLaunchInterface->isValid()) {
qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message());
isSuccess = false;
} else {
appLaunchInterface->setTimeout(10000);
QDBusReply<bool> reply = appLaunchInterface->call("LaunchDefaultAppWithUrl", QUrl::fromLocalFile(path).toString());
if(reply.isValid()) {
isSuccess = reply;
} else {
qWarning() << "SoftWareCenter dbus called failed!";
isSuccess = false;
}
}
if(appLaunchInterface) {
delete appLaunchInterface;
}
appLaunchInterface = NULL;
if (!isSuccess){
QDesktopServices::openUrl(QUrl::fromLocalFile(path));
}
res = 0;
}
g_object_unref(info);
}
return res;
}
bool FileUtils::copyPath(QString &path)
{
QApplication::clipboard()->setText(path);
return true;
}
QString FileUtils::escapeHtml(const QString &str)
{
QString temp = str;
temp.replace("<", "&lt;");
temp.replace(">", "&gt;");
return temp;
}
QString FileUtils::getSnippet(const std::string &myStr, uint start, const QString &keyword)
{
QFont boldFont(qApp->font().family());
boldFont.setPointSizeF(qApp->font().pointSizeF() + 2);
boldFont.setWeight(QFont::Bold);
QFontMetricsF boldMetricsF(boldFont);
uint strLength = 240;
bool elideLeft(false);
std::string sub = myStr.substr(start, strLength);
QString content = QString::fromStdString(sub);
//不够截往前补
if (start + strLength > myStr.length()) {
//新的起始位置
int newStart = myStr.length() - strLength;
if (myStr.length() < strLength) {
newStart = 0;
sub = myStr;
} else {
sub = myStr.substr(newStart, strLength);
}
if (horizontalAdvanceContainsKeyword(QString::fromStdString(myStr.substr(newStart, start)) + boldMetricsF.horizontalAdvance(keyword), keyword) > 2 * LABEL_MAX_WIDTH) {
if (horizontalAdvanceContainsKeyword(QString::fromStdString(myStr.substr(start)), keyword) <= 2 * LABEL_MAX_WIDTH) {
elideLeft = true;
} else {
sub = myStr.substr(start);
}
}
content = QString::fromStdString(sub);
}
QFont font(qApp->font().family());
font.setPointSizeF(qApp->font().pointSizeF());
QFontMetricsF fontMetricsF(font);
qreal blockLength = 0;
qreal total = 0;
int lineCount = 0;
int normalLength = 0;
int boldLength = 0;
QString snippet;
int boundaryStart = 0;
int boundaryEnd = 0;
QTextBoundaryFinder fm(QTextBoundaryFinder::Grapheme, content);
if (!elideLeft) {
for (;fm.position() != -1;fm.toNextBoundary()) {
boundaryEnd = fm.position();
QString word = content.mid(boundaryStart, boundaryEnd - boundaryStart);
if (boundaryStart == boundaryEnd) {
continue;
}
if (keyword.toUpper().contains(word.toUpper())) {
if (normalLength) {
total += fontMetricsF.horizontalAdvance(content.mid(boundaryStart - normalLength, normalLength));
normalLength = 0;
blockLength = 0;
}
boldLength += (boundaryEnd - boundaryStart);
blockLength = boldMetricsF.horizontalAdvance(content.mid(boundaryEnd - boldLength, boldLength));
} else {
if (boldLength) {
total += boldMetricsF.horizontalAdvance(content.mid(boundaryStart - boldLength, boldLength));
boldLength = 0;
blockLength = 0;
}
normalLength += (boundaryEnd - boundaryStart);
blockLength = fontMetricsF.horizontalAdvance(content.mid(boundaryEnd - normalLength, normalLength));
}
if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 0) {
if (total + blockLength > LABEL_MAX_WIDTH) {
fm.toPreviousBoundary();
snippet.append("\n");
} else {
snippet.append(word).append("\n");
boundaryStart = boundaryEnd;
}
normalLength = 0;
boldLength = 0;
lineCount++;
total = 0;
blockLength = 0;
continue;
} else if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 1) {
qreal distance = 0;
qreal wordSize = 0;
if (total + blockLength > LABEL_MAX_WIDTH) {
boundaryEnd = boundaryStart;
fm.toPreviousBoundary();
} else {
snippet.append(word);
}
while (wordSize < fontMetricsF.horizontalAdvance("")) {
boundaryStart = fm.position();
wordSize += keyword.toUpper().contains(content.mid(boundaryStart, boundaryEnd - boundaryStart).toUpper()) ?
boldMetricsF.horizontalAdvance(content.mid(boundaryStart, boundaryEnd - boundaryStart))
: fontMetricsF.horizontalAdvance(content.mid(boundaryStart, boundaryEnd - boundaryStart));
distance += (boundaryEnd - boundaryStart);
boundaryEnd = boundaryStart;
fm.toPreviousBoundary();
}
snippet = snippet.left(snippet.size() - distance);
snippet.append("");
break;
}
snippet.append(word);
boundaryStart = boundaryEnd;
}
} else {
boundaryEnd = content.size();
for (fm.toEnd(); fm.position() != -1; fm.toPreviousBoundary()) {
boundaryStart = fm.position();
if (boundaryEnd == boundaryStart) {
continue;
}
QString word = content.mid(boundaryStart, boundaryEnd - boundaryStart);
if (keyword.toUpper().contains(word.toUpper())) {
if (normalLength) {
total += fontMetricsF.horizontalAdvance(content.mid(boundaryEnd, normalLength));
normalLength = 0;
blockLength = 0;
}
boldLength += (boundaryEnd - boundaryStart);
blockLength = boldMetricsF.horizontalAdvance(content.mid(boundaryStart, boldLength));
} else {
if (boldLength) {
total += boldMetricsF.horizontalAdvance(content.mid(boundaryEnd, boldLength));
boldLength = 0;
blockLength = 0;
}
normalLength += (boundaryEnd - boundaryStart);
blockLength = fontMetricsF.horizontalAdvance(content.mid(boundaryStart, normalLength));
}
if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 0) {
if (total + blockLength > LABEL_MAX_WIDTH) {
fm.toNextBoundary();
snippet.prepend("\n");
} else {
snippet.prepend(word).prepend("\n");
boundaryStart = boundaryEnd;
}
normalLength = 0;
boldLength = 0;
lineCount++;
total = 0;
blockLength = 0;
continue;
} else if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 1) {
qreal distance = 0;
qreal wordSize = 0;
if (total + blockLength > LABEL_MAX_WIDTH) {
boundaryStart = boundaryEnd;
fm.toNextBoundary();
} else {
snippet.prepend(word);
}
while (wordSize < fontMetricsF.horizontalAdvance("")) {
boundaryEnd = fm.position();
QString firstLetter = content.mid(boundaryStart, boundaryEnd - boundaryStart);
wordSize += keyword.toUpper().contains(firstLetter.toUpper()) ?
boldMetricsF.horizontalAdvance(firstLetter) : fontMetricsF.horizontalAdvance(firstLetter);
distance += (boundaryEnd - boundaryStart);
boundaryStart = boundaryEnd;
fm.toNextBoundary();
}
snippet = snippet.right(snippet.size() - distance);
snippet.prepend("");
break;
}
snippet.prepend(word);
boundaryEnd = boundaryStart;
}
}
return snippet;
}
bool FileUtils::isOpenXMLFileEncrypted(const QString &path)
{
QFile file(path);
file.open(QIODevice::ReadOnly|QIODevice::Text);
QByteArray encrypt = file.read(4);
file.close();
if (encrypt.length() < 4) {
qDebug() << "Reading file error!" << path;
return true;
}
//比较前四位是否为对应值来判断OpenXML类型文件是否加密
if ((encrypt[0] & 0x50) && (encrypt[1] & 0x4b) && (encrypt[2] & 0x03) && (encrypt[3] & 0x04)) {
return false;
} else {
qDebug() << "Encrypt!" << path;
return true;
}
}
//todo: only support docx, pptx, xlsx
bool FileUtils::isEncrypedOrUnsupport(const QString& path, const QString& suffix)
{
QMimeType type = FileUtils::getMimetype(path);
QString name = type.name();
if(name == "application/zip") {
if (suffix == "docx" || suffix == "pptx" || suffix == "xlsx") {
return FileUtils::isOpenXMLFileEncrypted(path);
} else if (suffix == "uot" || suffix == "uos" || suffix == "uop") {
return false;
} else if (suffix == "ofd") {
return false;
} else {
return true;
}
} else if(name == "text/plain") {
if(suffix.endsWith("txt"))
return false;
return true;
} else if(name == "text/html") {
if(suffix.endsWith("html"))
return false;
return true;
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
if(suffix == "doc" || suffix == "dot" || suffix == "wps" || suffix == "ppt" ||
suffix == "pps" || suffix == "dps" || suffix == "et" || suffix == "xls" || suffix == "uof") {
return false;
}
return true;
} else if(name == "application/pdf") {
if(suffix == "pdf")
return false;
return true;
} else if(name == "application/xml" || name == "application/uof") {
if(suffix == "uof") {
return false;
}
return true;
} else if (true == targetPhotographTypeMap[suffix]) {
return !isOcrSupportSize(path);
} else {
// qInfo() << "Unsupport format:[" << path << "][" << type.name() << "]";
return true;
}
}
bool FileUtils::isOcrSupportSize(QString path)
{
/*
bool res;
Pix *image = pixRead(path.toStdString().data());
if (image->h < OCR_MIN_SIZE or image->w < OCR_MIN_SIZE) {//限制图片像素尺寸
qDebug() << "file:" << path << "is not right size.";
res = false;
} else
res = true;
pixDestroy(&image);
return res;
*/
QImage file(path);
if (file.height() < OCR_MIN_SIZE or file.width() < OCR_MIN_SIZE) {//限制图片像素尺寸
// qDebug() << "file:" << path << "is not right size.";
return false;
} else
return true;
}
QString FileUtils::getHtmlText(const QString &text, const QString &keyword)
{
QString htmlString = QString("<style>"
"span {"
"font-size:%0pt;"
"font-weight:bold;"
"}"
"</style>").arg(qApp->font().pointSizeF() + 2);
bool boldOpenned = false;
QTextBoundaryFinder bf(QTextBoundaryFinder::Grapheme, text);
int start = 0;
for (;bf.position() != -1; bf.toNextBoundary()) {
int end = bf.position();
if (end == start) {
continue;
}
if (keyword.toUpper().contains(text.mid(start, end - start).toUpper())) {
if(! boldOpenned) {
boldOpenned = true;
htmlString.append(QString("<span>"));
}
htmlString.append(FileUtils::escapeHtml(text.mid(start, end - start)));
} else {
if(boldOpenned) {
boldOpenned = false;
htmlString.append(QString("</span>"));
}
htmlString.append(FileUtils::escapeHtml(text.mid(start, end - start)));
}
start = end;
}
htmlString.replace("\n", "<br />");//替换换行符
return "<pre>" + htmlString + "</pre>";
}
QString FileUtils::setAllTextBold(const QString &name)
{
return QString("<h3 style=\"font-weight:normal;\"><pre>%1</pre></h3>").arg(escapeHtml(name));
}
QString FileUtils::wrapData(QLabel *p_label, const QString &text)
{
QString wrapText = text;
QFontMetrics fontMetrics = p_label->fontMetrics();
int textSize = fontMetrics.horizontalAdvance(wrapText);
if(textSize > LABEL_MAX_WIDTH){
int lastIndex = 0;
int count = 0;
for(int i = lastIndex; i < wrapText.length(); i++) {
if(fontMetrics.horizontalAdvance(wrapText.mid(lastIndex, i - lastIndex)) == LABEL_MAX_WIDTH) {
lastIndex = i;
wrapText.insert(i, '\n');
count++;
} else if(fontMetrics.horizontalAdvance(wrapText.mid(lastIndex, i - lastIndex)) > LABEL_MAX_WIDTH) {
lastIndex = i;
wrapText.insert(i - 1, '\n');
count++;
} else {
continue;
}
if(count == 2){
break;
}
}
}
return wrapText;
}
qreal FileUtils::horizontalAdvanceContainsKeyword(const QString &content, const QString &keyword)
{
QFont boldFont(qApp->font().family());
boldFont.setPointSizeF(qApp->font().pointSizeF() + 2);
boldFont.setWeight(QFont::Bold);
QFontMetricsF boldMetricsF(boldFont);
QFont font(qApp->font().family());
font.setPointSizeF(qApp->font().pointSizeF());
QFontMetricsF fontMetricsF(font);
QTextBoundaryFinder fm(QTextBoundaryFinder::Grapheme, content);
int start = 0;
qreal contentSize = 0;
int boldLength = 0;
int normalLength = 0;
for (;fm.position() != -1;fm.toNextBoundary()) {
int end = fm.position();
if (end == start) {
continue;
}
QString letter = content.mid(start, end - start);
if (keyword.toUpper().contains(letter.toUpper())) {
if (normalLength) {
contentSize += fontMetricsF.horizontalAdvance(content.mid(start - normalLength, normalLength));
normalLength = 0;
}
boldLength += (end - start);
} else {
if (boldLength) {
contentSize += boldMetricsF.horizontalAdvance(content.mid(start - boldLength, boldLength));
boldLength = 0;
}
normalLength += (end - start);
}
start = end;
}
if (boldLength) {
contentSize += boldMetricsF.horizontalAdvance(content.right(boldLength));
}
if (normalLength) {
contentSize += fontMetricsF.horizontalAdvance(content.right(normalLength));
}
return contentSize;
}
/**
* uof1.0解析
* 参考规范GB/T 20916-2007
* 1.文字处理
* 2.电子表格
* 3.演示文稿
* ppt的内容存放在对象集中
* 可以通过演示文稿-主体-幻灯片集-幻灯片下的锚点属性获取引用了哪些内容:
* <uof:锚点 uof:图形引用="OBJ16"/>
* 目标:文本串
*/
void FileUtils::getUOFTextContent(const QString &path, QString &textContent)
{
QFileInfo info(path);
if (!info.exists() || info.isDir()) {
return;
}
QFile file(path);
if (!file.open(QIODevice::ReadOnly)) {
return;
}
QDomDocument doc;
if (!doc.setContent(&file)) {
file.close();
return;
}
file.close();
bool isPPT = false;
QDomElement rootElem = doc.documentElement();
QDomNode node = rootElem.firstChild();
while (!node.isNull()) {
QDomElement e = node.toElement();
if (!e.isNull() && e.tagName() == "uof:演示文稿") {
isPPT = true;
break;
}
node = node.nextSibling();
}
//单独处理pdf文档
if (isPPT) {
qDebug() << path << "is PPT";
processUOFPPT(doc, textContent);
return;
}
file.open(QIODevice::ReadOnly);
QXmlStreamReader reader(&file);
while (!reader.atEnd()) {
//适用于文字处理与电子表格
if (reader.readNextStartElement() && reader.name().toString() == "文本串") {
textContent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if (textContent.length() >= MAX_CONTENT_LENGTH / 3) {
break;
}
}
}
file.close();
}
/**
* uof2.0解析
* @brief 参考规范文档 https://www.doc88.com/p-9089133923912.html 或 GJB/Z 165-2012
* ppt文档的内容存放在graphics.xml中需要先解析content中的引用再解析graphics内容
* @param path
* @param textContent
*/
void FileUtils::getUOF2TextContent(const QString &path, QString &textContent)
{
QFileInfo info = QFileInfo(path);
if (!info.exists() || info.isDir())
return;
QuaZip file(path);
if (!file.open(QuaZip::mdUnzip))
return;
if (!file.setCurrentFile("content.xml")) {
return;
}
QuaZipFile fileR(&file);
if (!fileR.open(QIODevice::ReadOnly)) {
return;
}
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()) {
if (reader.readNextStartElement() && reader.name().toString() == "文本串_415B") {
textContent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if (textContent.length() >= MAX_CONTENT_LENGTH / 3) {
break;
}
}
}
fileR.close();
file.close();
}
void FileUtils::getUOF2PPTContent(const QString &path, QString &textContent)
{
QFileInfo info = QFileInfo(path);
if (!info.exists() || info.isDir())
return;
QuaZip zipFile(path);
QDomDocument doc;
if (!loadZipFileToDoc(zipFile, doc, "content.xml")) {
return;
}
QDomElement rootElem = doc.documentElement();
QList<QDomElement> nodes;
QQueue<QString> names; //每个节点的名称
names << "演:幻灯片集_6C0E" << "演:幻灯片_6C0F";
findNodes(rootElem, names, nodes);
if (nodes.empty()) {
return;
}
QStringList attrs;
for (const auto &node : nodes) {
names.clear();
names << "uof:锚点_C644";
findNodeAttr(node, names, "图形引用_C62E", attrs);
}
if (attrs.empty()) {
return;
}
if (!loadZipFileToDoc(zipFile, doc, "graphics.xml")) {
return;
}
nodes.clear();
names.clear();
names << "图:图形_8062";
rootElem = doc.documentElement();
findNodesByAttr(rootElem, names, nodes, "标识符_804B", attrs);
QList<QDomElement> nodes416B; //字:段落_416B
for (const auto &node : nodes) {
names.clear();
names << "图:文本_803C" << "图:内容_8043" << "字:段落_416B";
findNodes(node, names, nodes416B);
}
nodes.clear();
for (const auto &node : nodes416B) {
names.clear();
names << "字:句_419D";
findNodes(node, names, nodes); //所有的 字:句_419D
}
for (const auto &node : nodes) {
names.clear();
names << "字:文本串_415B";
if (findNodeText(node, names, textContent)) {
break;
}
}
}
/**
* OFD文件解析
* @brief 参考: GB/T 33190-2016
* @param path
* @param textContent
*/
void FileUtils::getOFDTextContent(const QString &path, QString &textContent)
{
QFileInfo info = QFileInfo(path);
if (!info.exists() || info.isDir())
return;
QuaZip zipfile(path);
if (!zipfile.open(QuaZip::mdUnzip))
return;
// GB/T 33190-2016规范定义可以存在多个Doc_x目录暂时只取第一个目录的内容
QString prefix("Doc_0/Pages/");
QStringList fileList;
for (const auto &file: zipfile.getFileNameList()) {
if (file.startsWith(prefix)) {
fileList << file;
}
}
for (int i = 0; i < fileList.count(); ++i) {
QString filename = prefix + "Page_" + QString::number(i) + "/Content.xml";
if (!zipfile.setCurrentFile(filename)) {
continue;
}
QuaZipFile fileR(&zipfile);
fileR.open(QIODevice::ReadOnly);
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()) {
if (reader.readNextStartElement() && reader.name().toString() == "TextCode") {
textContent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if (textContent.length() >= MAX_CONTENT_LENGTH / 3) {
fileR.close();
zipfile.close();
return;
}
}
}
fileR.close();
}
zipfile.close();
}
QString FileUtils::getSnippetWithoutKeyword(const QString &content, int lineCount) {
QString snippet;
int numOfLine = 0;
QFont font(qApp->font().family());
font.setPointSizeF(qApp->font().pointSizeF());
QFontMetricsF fontMetricsF(font);
qreal length = 0;
int wordCount = 0;
int boundaryStart = 0;
int boundaryEnd = 0;
QTextBoundaryFinder fm(QTextBoundaryFinder::Grapheme, content);
for(;fm.position() != -1;fm.toNextBoundary()) {
boundaryEnd = fm.position();
if (boundaryEnd == boundaryStart) {
continue;
}
if (numOfLine == lineCount) {
break;
}
QString word = content.mid(boundaryStart, boundaryEnd - boundaryStart);
wordCount += boundaryEnd - boundaryStart;
length = fontMetricsF.horizontalAdvance(content.mid(boundaryEnd - wordCount, wordCount));
if (length >= LABEL_MAX_WIDTH || word == "\n") {
if (word == "\n") {
boundaryStart = boundaryEnd;
} else if (length > LABEL_MAX_WIDTH) {
fm.toPreviousBoundary();
} else {
boundaryStart = boundaryEnd;
snippet.append(word);
}
snippet.append("\n");
numOfLine++;
if (numOfLine == lineCount) {
qreal distance = 1;//最后一位必然是\n
qreal wordSize = 0;
if (!(word == "\n" && length < LABEL_MAX_WIDTH)) {
if (length > LABEL_MAX_WIDTH) {
boundaryEnd = boundaryStart;
}
while (wordSize < fontMetricsF.horizontalAdvance("")) {
boundaryStart = fm.position();
wordSize += fontMetricsF.horizontalAdvance(content.mid(boundaryStart, boundaryEnd - boundaryStart));
distance += (boundaryEnd - boundaryStart);
boundaryEnd = boundaryStart;
fm.toPreviousBoundary();
}
}
snippet = snippet.left(snippet.size() - distance);
snippet.append("");
break;
}
length = 0;
wordCount = 0;
continue;
}
snippet.append(word);
boundaryStart = boundaryEnd;
}
return snippet;
}