/* * Copyright (C) 2020, KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * Authors: zhangpengfei * Modified by: zhangzihao * Modified by: zhangjiaping * */ #include "file-utils.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gobject-template.h" #include "hanzi-to-pinyin.h" #include "common.h" #include "icon-loader.h" using namespace UkuiSearch; #define MAX_CONTENT_LENGTH 20480000 /** * @brief 查找elem的子节点 * @param elem 起始节点 * @param names 名称链 * @param nodes 查找到的全部结果 */ void findNodes(const QDomElement &elem, QQueue &names, QList &nodes) { QString targetName = names.dequeue(); QDomNode node = elem.firstChild(); while (!node.isNull()) { QDomElement e = node.toElement(); if (!e.isNull() && e.tagName() == targetName) { if (names.empty()) { nodes.append(e); } else { findNodes(e, names, nodes); break; } } node = node.nextSibling(); } } void findNodesByAttr(const QDomElement &elem, QQueue &names, QList &nodes, const QString &attr, const QStringList &values) { findNodes(elem, names, nodes); QList::iterator it = nodes.begin(); while (it != nodes.end()) { if ((*it).hasAttribute(attr) && values.contains((*it).attribute(attr))) { it++; } else { it = nodes.erase(it); } } } bool findNodeText(const QDomElement &elem, QQueue &names, QString &content) { QList nodes; findNodes(elem, names, nodes); for (const auto &node : nodes) { content.append(node.text()); if (content.length() >= MAX_CONTENT_LENGTH / 3) { return true; } } return false; } void findNodeAttr(const QDomElement &elem, QQueue &names, const QString &attr, QStringList &attrs) { QList nodes; findNodes(elem, names, nodes); for (const auto &node : nodes) { if (node.hasAttribute(attr)) { attrs.append(node.attribute(attr)); } } } void processUOFPPT(const QDomDocument &doc, QString &content) { QDomElement rootElem = doc.documentElement(); QList nodes; QQueue names; //每个节点的名称 names << "uof:演示文稿" << "演:主体" << "演:幻灯片集" << "演:幻灯片"; findNodes(rootElem, names, nodes); if (nodes.empty()) { //TODO 在uof-ppt不存在锚点节点时,直接查找文本节点? return; } QStringList objs; //每一个 演:幻灯片 -> 锚点 for (const auto &node : nodes) { names.clear(); names << "uof:锚点"; findNodeAttr(node, names, "uof:图形引用", objs); } nodes.clear(); names.clear(); names << "uof:对象集" << "图:图形"; findNodesByAttr(rootElem, names, nodes, "图:标识符", objs); if (nodes.empty()) { return; } QList paraNodes; //全部段落节点 for (const auto &node : nodes) { names.clear(); names << "图:文本内容" << "字:段落"; findNodes(node, names, paraNodes); } nodes.clear(); for (const auto &node : paraNodes) { names.clear(); names << "字:句"; findNodes(node, names, nodes); //全部段落下的全部句节点 } for (const auto &node : nodes) { names.clear(); names << "字:文本串"; if (findNodeText(node, names, content)) { break; } } } bool loadZipFileToDoc(QuaZip &zipFile, QDomDocument &doc, const QString &fileName) { if (!zipFile.isOpen() && !zipFile.open(QuaZip::mdUnzip)) { return false; } if (!zipFile.setCurrentFile(fileName)) { return false; } QuaZipFile file(&zipFile); if (!file.open(QIODevice::ReadOnly)) { return false; } doc.clear(); if (!doc.setContent(&file)) { file.close(); return false; } file.close(); return true; } FileUtils::FileUtils() { } std::string FileUtils::makeDocUterm(QString path) { return QCryptographicHash::hash(path.toUtf8(), QCryptographicHash::Md5).toHex().toStdString(); } /** * @brief FileUtils::getFileIcon 获取文件图标 * @param uri "file:///home/xxx/xxx/xxxx.txt"格式 * @param checkValid * @return */ QIcon FileUtils::getFileIcon(const QString &uri, bool checkValid) { Q_UNUSED(checkValid) auto file = wrapGFile(g_file_new_for_uri(uri.toUtf8().constData())); auto info = wrapGFileInfo(g_file_query_info(file.get()->get(), G_FILE_ATTRIBUTE_STANDARD_ICON, G_FILE_QUERY_INFO_NONE, nullptr, nullptr)); if(!G_IS_FILE_INFO(info.get()->get())) return IconLoader::loadIconQt("unknown",QIcon(":/res/icons/unknown.svg")); GIcon *g_icon = g_file_info_get_icon(info.get()->get()); //do not unref the GIcon from info. if(G_IS_ICON(g_icon)) { const gchar* const* icon_names = g_themed_icon_get_names(G_THEMED_ICON(g_icon)); if(icon_names) { auto p = icon_names; while(*p) { QIcon icon = IconLoader::loadIconQt(*p); if(!icon.isNull()) { return icon; } else { p++; } } } } return IconLoader::loadIconQt("unknown", QIcon(":/res/icons/unknown.svg")); } QIcon FileUtils::getSettingIcon() { return IconLoader::loadIconQt("ukui-control-center", QIcon(":/res/icons/ukui-control-center.svg")); //返回控制面板应用图标 // 返回控制面板应用图标 } /** * @brief FileUtils::getFileName 获取文件名 * @param uri 格式为"file:///home/xxx/xxx/xxxx.txt" * @return */ QString FileUtils::getFileName(const QString &uri) { QFileInfo info(uri); if(info.exists()) { return info.fileName(); } else { return "Unknown File"; } // QUrl url = uri; // if (url.fileName().isEmpty()) { // return "Unknown File"; // } // return url.fileName(); } /** * @brief FileUtils::getAppName 获取应用名 * @param path .destop文件的完整路径 * @return */ QString FileUtils::getAppName(const QString &path) { QByteArray ba; ba = path.toUtf8(); GKeyFile * keyfile; keyfile = g_key_file_new(); if(!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)) { g_key_file_free(keyfile); return "Unknown App"; } QString name = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_NAME, NULL, NULL)); g_key_file_free(keyfile); return name; } /** * @brief FileUtils::getSettingName 获取设置项名 * @param setting 设置项传入参数,格式为 About/About->Properties * @return */ QString FileUtils::getSettingName(const QString &setting) { return setting.right(setting.length() - setting.lastIndexOf("/") - 1); } bool FileUtils::isOrUnder(QString pathA, QString pathB) { if (pathB == "/") { return true; } if(pathA.length() < pathB.length()) return false; if(pathA == pathB || pathA.startsWith(pathB + "/")) return true; return false; } QMimeType FileUtils::getMimetype(const QString &path) { QMimeDatabase mdb; QMimeType type = mdb.mimeTypeForFile(path, QMimeDatabase::MatchContent); return type; } QStringList FileUtils::findMultiToneWords(const QString &hanzi) { QStringList output, results; HanZiToPinYin::getInstance()->getResults(hanzi.toStdString(), results); QString oneResult(results.join("")); QString firstLetter; for (QString & info : results) { if (!info.isEmpty()) firstLetter += info.at(0); } return output << oneResult << firstLetter; } /** * @brief FileUtils::getDocxTextContent * @param path: abs path * @return docx to QString */ void FileUtils::getDocxTextContent(const QString &path, QString &textcontent) { //fix me :optimized by xpath?? QFileInfo info = QFileInfo(path); if(!info.exists() || info.isDir()) return; QuaZip file(path); if(!file.open(QuaZip::mdUnzip)) return; if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) { file.close(); return; } QuaZipFile fileR(&file); fileR.open(QIODevice::ReadOnly); //读取方式打开 QXmlStreamReader reader(&fileR); while (!reader.atEnd()){ if(reader.readNextStartElement() and reader.name().toString() == "t"){ textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " ")); if(textcontent.length() >= MAX_CONTENT_LENGTH/3){ break; } } } fileR.close(); file.close(); return; /* //原加载DOM文档方式; QDomDocument doc; doc.setContent(fileR.readAll()); fileR.close(); QDomElement first = doc.firstChildElement("w:document"); QDomElement body = first.firstChildElement("w:body"); while(!body.isNull()) { QDomElement wp = body.firstChildElement("w:p"); while(!wp.isNull()) { QDomElement wr = wp.firstChildElement("w:r"); while(!wr.isNull()) { QDomElement wt = wr.firstChildElement("w:t"); textcontent.append(wt.text().replace("\n", "")).replace("\r", " "); if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) { file.close(); return; } wr = wr.nextSiblingElement(); } wp = wp.nextSiblingElement(); } body = body.nextSiblingElement(); } file.close(); return; */ } void FileUtils::getPptxTextContent(const QString &path, QString &textcontent) { QFileInfo info = QFileInfo(path); if(!info.exists() || info.isDir()) return; QuaZip file(path); if(!file.open(QuaZip::mdUnzip)) return; QString prefix("ppt/slides/slide"); QStringList fileList; for(QString i : file.getFileNameList()) { if(i.startsWith(prefix)) fileList << i; } if(fileList.isEmpty()) { file.close(); return; } for(int i = 0; i < fileList.size(); ++i){ QString name = prefix + QString::number(i + 1) + ".xml"; if(!file.setCurrentFile(name)) { continue; } QuaZipFile fileR(&file); fileR.open(QIODevice::ReadOnly); QXmlStreamReader reader(&fileR); while (!reader.atEnd()){ if(reader.readNextStartElement() and reader.name().toString() == "t"){ textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " ")); if(textcontent.length() >= MAX_CONTENT_LENGTH/3){ break; } } } fileR.close(); } file.close(); return; /* QDomElement sptree; QDomElement sp; QDomElement txbody; QDomElement ap; QDomElement ar; QDomDocument doc; QDomElement at; // QDomNodeList atList; for(int i = 0; i < fileList.size(); ++i) { QString name = prefix + QString::number(i + 1) + ".xml"; if(!file.setCurrentFile(name)) { continue; } QuaZipFile fileR(&file); fileR.open(QIODevice::ReadOnly); doc.clear(); doc.setContent(fileR.readAll()); fileR.close(); //fix me :optimized by xpath?? //This method looks better but slower, //If xml file is very large with many useless node,this method will take a lot of time. // atList = doc.elementsByTagName("a:t"); // for(int i = 0; i= MAX_CONTENT_LENGTH/3) // { // file.close(); // return; // } // } // } //This is ugly but seems more efficient when handel a large file. sptree = doc.firstChildElement("p:sld").firstChildElement("p:cSld").firstChildElement("p:spTree"); while(!sptree.isNull()) { sp = sptree.firstChildElement("p:sp"); while(!sp.isNull()) { txbody = sp.firstChildElement("p:txBody"); while(!txbody.isNull()) { ap = txbody.firstChildElement("a:p"); while(!ap.isNull()) { ar = ap.firstChildElement("a:r"); while(!ar.isNull()) { at = ar.firstChildElement("a:t"); textcontent.append(at.text().replace("\r", "")).replace("\t", ""); if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) { file.close(); return; } ar = ar.nextSiblingElement(); } ap = ap.nextSiblingElement(); } txbody = txbody.nextSiblingElement(); } sp = sp.nextSiblingElement(); } sptree = sptree.nextSiblingElement(); } } file.close(); return; */ } void FileUtils::getXlsxTextContent(const QString &path, QString &textcontent) { QFileInfo info = QFileInfo(path); if(!info.exists() || info.isDir()) return; QuaZip file(path); if(!file.open(QuaZip::mdUnzip)) return; if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) { file.close(); return; } QuaZipFile fileR(&file); fileR.open(QIODevice::ReadOnly); QXmlStreamReader reader(&fileR); while (!reader.atEnd()){ if(reader.readNextStartElement() and reader.name().toString() == "t"){ textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " ")); if(textcontent.length() >= MAX_CONTENT_LENGTH/3){ break; } } } fileR.close(); file.close(); return; /* QDomDocument doc; doc.setContent(fileR.readAll()); fileR.close(); QDomElement sst = doc.firstChildElement("sst"); QDomElement si; QDomElement r; QDomElement t; while(!sst.isNull()) { si = sst.firstChildElement("si"); while(!si.isNull()) { r = si.firstChildElement("r"); if(r.isNull()) { t = si.firstChildElement("t"); } else { t = r.firstChildElement("t"); } if(t.isNull()) continue; textcontent.append(t.text().replace("\r", "").replace("\n", "")); if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) { file.close(); return; } si = si.nextSiblingElement(); } sst = sst.nextSiblingElement(); } file.close(); return; */ } void FileUtils::getPdfTextContent(const QString &path, QString &textcontent) { Poppler::Document *doc = Poppler::Document::load(path); if(doc->isLocked()) { delete doc; return; } const QRectF qf; int pageNum = doc->numPages(); for(int i = 0; i < pageNum; ++i) { Poppler::Page *page = doc->page(i); if(page) { textcontent.append(page->text(qf).replace("\n", "").replace("\r", " ")); delete page; if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) break; } } delete doc; return; } void FileUtils::getTxtContent(const QString &path, QString &textcontent) { QFile file(path); if(!file.open(QIODevice::ReadOnly | QIODevice::Text)) return; QByteArray encodedString = file.read(MAX_CONTENT_LENGTH); uchardet_t chardet = uchardet_new(); if(uchardet_handle_data(chardet, encodedString.constData(), encodedString.size()) != 0) qWarning() << "Txt file encoding format detect fail!" << path; uchardet_data_end(chardet); const char *codec = uchardet_get_charset(chardet); if(QTextCodec::codecForName(codec) == nullptr) { qWarning() << "Unsupported Text encoding format" << path << QString::fromLocal8Bit(codec); return; } QTextStream stream(encodedString, QIODevice::ReadOnly); stream.setCodec(codec); uchardet_delete(chardet); textcontent = stream.readAll().replace("\n", "").replace("\r", " "); file.close(); encodedString.clear(); chardet = NULL; stream.flush(); return; } int FileUtils::openFile(QString &path, bool openInDir) { int res = -1; if(openInDir) { QStringList list; list.append(path); QDBusMessage message = QDBusMessage::createMethodCall("org.freedesktop.FileManager1", "/org/freedesktop/FileManager1", "org.freedesktop.FileManager1", "ShowItems"); message.setArguments({list, "ukui-search"}); QDBusMessage messageRes = QDBusConnection::sessionBus().call(message); if (QDBusMessage::ReplyMessage == messageRes.ReplyMessage) { res = 0; } else { qDebug() << "Error! QDBusMessage reply error! ReplyMessage:" << messageRes.ReplyMessage; res = -1; } } else { auto file = wrapGFile(g_file_new_for_uri(QUrl::fromLocalFile(path).toString().toUtf8().constData())); auto fileInfo = wrapGFileInfo(g_file_query_info(file.get()->get(), "standard::*," "time::*," "access::*," "mountable::*," "metadata::*," "trash::*," G_FILE_ATTRIBUTE_ID_FILE, G_FILE_QUERY_INFO_NONE, nullptr, nullptr)); QString mimeType = g_file_info_get_content_type (fileInfo.get()->get()); if (mimeType == nullptr) { if (g_file_info_has_attribute(fileInfo.get()->get(), "standard::fast-content-type")) { mimeType = g_file_info_get_attribute_string(fileInfo.get()->get(), "standard::fast-content-type"); } } GError *error = NULL; GAppInfo *info = NULL; /* * g_app_info_get_default_for_type function get wrong default app, so we get the * default app info from mimeapps.list, and chose the right default app for mimeType file */ QString mimeAppsListPath = QStandardPaths::writableLocation(QStandardPaths::HomeLocation) + "/.config/mimeapps.list"; GKeyFile *keyfile = g_key_file_new(); gboolean ret = g_key_file_load_from_file(keyfile, mimeAppsListPath.toUtf8(), G_KEY_FILE_NONE, &error); if (false == ret) { qWarning()<<"load mimeapps list error msg"<message; info = g_app_info_get_default_for_type(mimeType.toUtf8().constData(), false); g_error_free(error); } else { gchar *desktopApp = g_key_file_get_string(keyfile, "Default Applications", mimeType.toUtf8(), &error); if (NULL != desktopApp) { info = (GAppInfo*)g_desktop_app_info_new(desktopApp); g_free (desktopApp); } else { info = g_app_info_get_default_for_type(mimeType.toUtf8().constData(), false); } } g_key_file_free (keyfile); if(!G_IS_APP_INFO(info)) { res = -1; } else { bool isSuccess(false); QDBusInterface * appLaunchInterface = new QDBusInterface("com.kylin.AppManager", "/com/kylin/AppManager", "com.kylin.AppManager", QDBusConnection::sessionBus()); if(!appLaunchInterface->isValid()) { qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message()); isSuccess = false; } else { appLaunchInterface->setTimeout(10000); QDBusReply reply = appLaunchInterface->call("LaunchDefaultAppWithUrl", QUrl::fromLocalFile(path).toString()); if(reply.isValid()) { isSuccess = reply; } else { qWarning() << "SoftWareCenter dbus called failed!"; isSuccess = false; } } if(appLaunchInterface) { delete appLaunchInterface; } appLaunchInterface = NULL; if (!isSuccess){ QDesktopServices::openUrl(QUrl::fromLocalFile(path)); } res = 0; } g_object_unref(info); } return res; } bool FileUtils::copyPath(QString &path) { QApplication::clipboard()->setText(path); return true; } QString FileUtils::escapeHtml(const QString &str) { QString temp = str; temp.replace("<", "<"); temp.replace(">", ">"); return temp; } QString FileUtils::getSnippet(const std::string &myStr, uint start, const QString &keyword) { QFont boldFont(qApp->font().family()); boldFont.setPointSizeF(qApp->font().pointSizeF() + 2); boldFont.setWeight(QFont::Bold); QFontMetricsF boldMetricsF(boldFont); uint strLength = 240; bool elideLeft(false); std::string sub = myStr.substr(start, strLength); QString content = QString::fromStdString(sub); //不够截往前补 if (start + strLength > myStr.length()) { //新的起始位置 int newStart = myStr.length() - strLength; if (myStr.length() < strLength) { newStart = 0; sub = myStr; } else { sub = myStr.substr(newStart, strLength); } if (horizontalAdvanceContainsKeyword(QString::fromStdString(myStr.substr(newStart, start)) + boldMetricsF.horizontalAdvance(keyword), keyword) > 2 * LABEL_MAX_WIDTH) { if (horizontalAdvanceContainsKeyword(QString::fromStdString(myStr.substr(start)), keyword) <= 2 * LABEL_MAX_WIDTH) { elideLeft = true; } else { sub = myStr.substr(start); } } content = QString::fromStdString(sub); } QFont font(qApp->font().family()); font.setPointSizeF(qApp->font().pointSizeF()); QFontMetricsF fontMetricsF(font); qreal blockLength = 0; qreal total = 0; int lineCount = 0; int normalLength = 0; int boldLength = 0; QString snippet; int boundaryStart = 0; int boundaryEnd = 0; QTextBoundaryFinder fm(QTextBoundaryFinder::Grapheme, content); if (!elideLeft) { for (;fm.position() != -1;fm.toNextBoundary()) { boundaryEnd = fm.position(); QString word = content.mid(boundaryStart, boundaryEnd - boundaryStart); if (boundaryStart == boundaryEnd) { continue; } if (keyword.toUpper().contains(word.toUpper())) { if (normalLength) { total += fontMetricsF.horizontalAdvance(content.mid(boundaryStart - normalLength, normalLength)); normalLength = 0; blockLength = 0; } boldLength += (boundaryEnd - boundaryStart); blockLength = boldMetricsF.horizontalAdvance(content.mid(boundaryEnd - boldLength, boldLength)); } else { if (boldLength) { total += boldMetricsF.horizontalAdvance(content.mid(boundaryStart - boldLength, boldLength)); boldLength = 0; blockLength = 0; } normalLength += (boundaryEnd - boundaryStart); blockLength = fontMetricsF.horizontalAdvance(content.mid(boundaryEnd - normalLength, normalLength)); } if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 0) { if (total + blockLength > LABEL_MAX_WIDTH) { fm.toPreviousBoundary(); snippet.append("\n"); } else { snippet.append(word).append("\n"); boundaryStart = boundaryEnd; } normalLength = 0; boldLength = 0; lineCount++; total = 0; blockLength = 0; continue; } else if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 1) { qreal distance = 0; qreal wordSize = 0; if (total + blockLength > LABEL_MAX_WIDTH) { boundaryEnd = boundaryStart; fm.toPreviousBoundary(); } else { snippet.append(word); } while (wordSize < fontMetricsF.horizontalAdvance("…")) { boundaryStart = fm.position(); wordSize += keyword.toUpper().contains(content.mid(boundaryStart, boundaryEnd - boundaryStart).toUpper()) ? boldMetricsF.horizontalAdvance(content.mid(boundaryStart, boundaryEnd - boundaryStart)) : fontMetricsF.horizontalAdvance(content.mid(boundaryStart, boundaryEnd - boundaryStart)); distance += (boundaryEnd - boundaryStart); boundaryEnd = boundaryStart; fm.toPreviousBoundary(); } snippet = snippet.left(snippet.size() - distance); snippet.append("…"); break; } snippet.append(word); boundaryStart = boundaryEnd; } } else { boundaryEnd = content.size(); for (fm.toEnd(); fm.position() != -1; fm.toPreviousBoundary()) { boundaryStart = fm.position(); if (boundaryEnd == boundaryStart) { continue; } QString word = content.mid(boundaryStart, boundaryEnd - boundaryStart); if (keyword.toUpper().contains(word.toUpper())) { if (normalLength) { total += fontMetricsF.horizontalAdvance(content.mid(boundaryEnd, normalLength)); normalLength = 0; blockLength = 0; } boldLength += (boundaryEnd - boundaryStart); blockLength = boldMetricsF.horizontalAdvance(content.mid(boundaryStart, boldLength)); } else { if (boldLength) { total += boldMetricsF.horizontalAdvance(content.mid(boundaryEnd, boldLength)); boldLength = 0; blockLength = 0; } normalLength += (boundaryEnd - boundaryStart); blockLength = fontMetricsF.horizontalAdvance(content.mid(boundaryStart, normalLength)); } if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 0) { if (total + blockLength > LABEL_MAX_WIDTH) { fm.toNextBoundary(); snippet.prepend("\n"); } else { snippet.prepend(word).prepend("\n"); boundaryStart = boundaryEnd; } normalLength = 0; boldLength = 0; lineCount++; total = 0; blockLength = 0; continue; } else if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 1) { qreal distance = 0; qreal wordSize = 0; if (total + blockLength > LABEL_MAX_WIDTH) { boundaryStart = boundaryEnd; fm.toNextBoundary(); } else { snippet.prepend(word); } while (wordSize < fontMetricsF.horizontalAdvance("…")) { boundaryEnd = fm.position(); QString firstLetter = content.mid(boundaryStart, boundaryEnd - boundaryStart); wordSize += keyword.toUpper().contains(firstLetter.toUpper()) ? boldMetricsF.horizontalAdvance(firstLetter) : fontMetricsF.horizontalAdvance(firstLetter); distance += (boundaryEnd - boundaryStart); boundaryStart = boundaryEnd; fm.toNextBoundary(); } snippet = snippet.right(snippet.size() - distance); snippet.prepend("…"); break; } snippet.prepend(word); boundaryEnd = boundaryStart; } } return snippet; } bool FileUtils::isOpenXMLFileEncrypted(const QString &path) { QFile file(path); file.open(QIODevice::ReadOnly|QIODevice::Text); QByteArray encrypt = file.read(4); file.close(); if (encrypt.length() < 4) { qDebug() << "Reading file error!" << path; return true; } //比较前四位是否为对应值来判断OpenXML类型文件是否加密 if ((encrypt[0] & 0x50) && (encrypt[1] & 0x4b) && (encrypt[2] & 0x03) && (encrypt[3] & 0x04)) { return false; } else { qDebug() << "Encrypt!" << path; return true; } } //todo: only support docx, pptx, xlsx bool FileUtils::isEncrypedOrUnsupport(const QString& path, const QString& suffix) { QMimeType type = FileUtils::getMimetype(path); QString name = type.name(); if(name == "application/zip") { if (suffix == "docx" || suffix == "pptx" || suffix == "xlsx") { return FileUtils::isOpenXMLFileEncrypted(path); } else if (suffix == "uot" || suffix == "uos" || suffix == "uop") { return false; } else if (suffix == "ofd") { return false; } else { return true; } } else if(name == "text/plain") { if(suffix.endsWith("txt")) return false; return true; } else if(name == "text/html") { if(suffix.endsWith("html")) return false; return true; } else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") { if(suffix == "doc" || suffix == "dot" || suffix == "wps" || suffix == "ppt" || suffix == "pps" || suffix == "dps" || suffix == "et" || suffix == "xls" || suffix == "uof") { return false; } return true; } else if(name == "application/pdf") { if(suffix == "pdf") return false; return true; } else if(name == "application/xml" || name == "application/uof") { if(suffix == "uof") { return false; } return true; } else if (true == targetPhotographTypeMap[suffix]) { return !isOcrSupportSize(path); } else { // qInfo() << "Unsupport format:[" << path << "][" << type.name() << "]"; return true; } } bool FileUtils::isOcrSupportSize(QString path) { /* bool res; Pix *image = pixRead(path.toStdString().data()); if (image->h < OCR_MIN_SIZE or image->w < OCR_MIN_SIZE) {//限制图片像素尺寸 qDebug() << "file:" << path << "is not right size."; res = false; } else res = true; pixDestroy(&image); return res; */ QImage file(path); if (file.height() < OCR_MIN_SIZE or file.width() < OCR_MIN_SIZE) {//限制图片像素尺寸 // qDebug() << "file:" << path << "is not right size."; return false; } else return true; } QString FileUtils::getHtmlText(const QString &text, const QString &keyword) { QString htmlString = QString("").arg(qApp->font().pointSizeF() + 2); bool boldOpenned = false; QTextBoundaryFinder bf(QTextBoundaryFinder::Grapheme, text); int start = 0; for (;bf.position() != -1; bf.toNextBoundary()) { int end = bf.position(); if (end == start) { continue; } if (keyword.toUpper().contains(text.mid(start, end - start).toUpper())) { if(! boldOpenned) { boldOpenned = true; htmlString.append(QString("")); } htmlString.append(FileUtils::escapeHtml(text.mid(start, end - start))); } else { if(boldOpenned) { boldOpenned = false; htmlString.append(QString("")); } htmlString.append(FileUtils::escapeHtml(text.mid(start, end - start))); } start = end; } htmlString.replace("\n", "
");//替换换行符 return "
" + htmlString + "
"; } QString FileUtils::setAllTextBold(const QString &name) { return QString("

%1

").arg(escapeHtml(name)); } QString FileUtils::wrapData(QLabel *p_label, const QString &text) { QString wrapText = text; QFontMetrics fontMetrics = p_label->fontMetrics(); int textSize = fontMetrics.horizontalAdvance(wrapText); if(textSize > LABEL_MAX_WIDTH){ int lastIndex = 0; int count = 0; for(int i = lastIndex; i < wrapText.length(); i++) { if(fontMetrics.horizontalAdvance(wrapText.mid(lastIndex, i - lastIndex)) == LABEL_MAX_WIDTH) { lastIndex = i; wrapText.insert(i, '\n'); count++; } else if(fontMetrics.horizontalAdvance(wrapText.mid(lastIndex, i - lastIndex)) > LABEL_MAX_WIDTH) { lastIndex = i; wrapText.insert(i - 1, '\n'); count++; } else { continue; } if(count == 2){ break; } } } return wrapText; } qreal FileUtils::horizontalAdvanceContainsKeyword(const QString &content, const QString &keyword) { QFont boldFont(qApp->font().family()); boldFont.setPointSizeF(qApp->font().pointSizeF() + 2); boldFont.setWeight(QFont::Bold); QFontMetricsF boldMetricsF(boldFont); QFont font(qApp->font().family()); font.setPointSizeF(qApp->font().pointSizeF()); QFontMetricsF fontMetricsF(font); QTextBoundaryFinder fm(QTextBoundaryFinder::Grapheme, content); int start = 0; qreal contentSize = 0; int boldLength = 0; int normalLength = 0; for (;fm.position() != -1;fm.toNextBoundary()) { int end = fm.position(); if (end == start) { continue; } QString letter = content.mid(start, end - start); if (keyword.toUpper().contains(letter.toUpper())) { if (normalLength) { contentSize += fontMetricsF.horizontalAdvance(content.mid(start - normalLength, normalLength)); normalLength = 0; } boldLength += (end - start); } else { if (boldLength) { contentSize += boldMetricsF.horizontalAdvance(content.mid(start - boldLength, boldLength)); boldLength = 0; } normalLength += (end - start); } start = end; } if (boldLength) { contentSize += boldMetricsF.horizontalAdvance(content.right(boldLength)); } if (normalLength) { contentSize += fontMetricsF.horizontalAdvance(content.right(normalLength)); } return contentSize; } /** * uof1.0解析 * 参考规范:GB/T 20916-2007 * 1.文字处理 * 2.电子表格 * 3.演示文稿 * ppt的内容存放在对象集中, * 可以通过演示文稿-主体-幻灯片集-幻灯片下的锚点属性获取引用了哪些内容: * * 目标:文本串 */ void FileUtils::getUOFTextContent(const QString &path, QString &textContent) { QFileInfo info(path); if (!info.exists() || info.isDir()) { return; } QFile file(path); if (!file.open(QIODevice::ReadOnly)) { return; } QDomDocument doc; if (!doc.setContent(&file)) { file.close(); return; } file.close(); bool isPPT = false; QDomElement rootElem = doc.documentElement(); QDomNode node = rootElem.firstChild(); while (!node.isNull()) { QDomElement e = node.toElement(); if (!e.isNull() && e.tagName() == "uof:演示文稿") { isPPT = true; break; } node = node.nextSibling(); } //单独处理pdf文档 if (isPPT) { qDebug() << path << "is PPT"; processUOFPPT(doc, textContent); return; } file.open(QIODevice::ReadOnly); QXmlStreamReader reader(&file); while (!reader.atEnd()) { //适用于文字处理与电子表格 if (reader.readNextStartElement() && reader.name().toString() == "文本串") { textContent.append(reader.readElementText().replace("\n", "").replace("\r", " ")); if (textContent.length() >= MAX_CONTENT_LENGTH / 3) { break; } } } file.close(); } /** * uof2.0解析 * @brief 参考规范文档 https://www.doc88.com/p-9089133923912.html 或 GJB/Z 165-2012 * ppt文档的内容存放在graphics.xml中,需要先解析content中的引用再解析graphics内容 * @param path * @param textContent */ void FileUtils::getUOF2TextContent(const QString &path, QString &textContent) { QFileInfo info = QFileInfo(path); if (!info.exists() || info.isDir()) return; QuaZip file(path); if (!file.open(QuaZip::mdUnzip)) return; if (!file.setCurrentFile("content.xml")) { return; } QuaZipFile fileR(&file); if (!fileR.open(QIODevice::ReadOnly)) { return; } QXmlStreamReader reader(&fileR); while (!reader.atEnd()) { if (reader.readNextStartElement() && reader.name().toString() == "文本串_415B") { textContent.append(reader.readElementText().replace("\n", "").replace("\r", " ")); if (textContent.length() >= MAX_CONTENT_LENGTH / 3) { break; } } } fileR.close(); file.close(); } void FileUtils::getUOF2PPTContent(const QString &path, QString &textContent) { QFileInfo info = QFileInfo(path); if (!info.exists() || info.isDir()) return; QuaZip zipFile(path); QDomDocument doc; if (!loadZipFileToDoc(zipFile, doc, "content.xml")) { return; } QDomElement rootElem = doc.documentElement(); QList nodes; QQueue names; //每个节点的名称 names << "演:幻灯片集_6C0E" << "演:幻灯片_6C0F"; findNodes(rootElem, names, nodes); if (nodes.empty()) { return; } QStringList attrs; for (const auto &node : nodes) { names.clear(); names << "uof:锚点_C644"; findNodeAttr(node, names, "图形引用_C62E", attrs); } if (attrs.empty()) { return; } if (!loadZipFileToDoc(zipFile, doc, "graphics.xml")) { return; } nodes.clear(); names.clear(); names << "图:图形_8062"; rootElem = doc.documentElement(); findNodesByAttr(rootElem, names, nodes, "标识符_804B", attrs); QList nodes416B; //字:段落_416B for (const auto &node : nodes) { names.clear(); names << "图:文本_803C" << "图:内容_8043" << "字:段落_416B"; findNodes(node, names, nodes416B); } nodes.clear(); for (const auto &node : nodes416B) { names.clear(); names << "字:句_419D"; findNodes(node, names, nodes); //所有的 字:句_419D } for (const auto &node : nodes) { names.clear(); names << "字:文本串_415B"; if (findNodeText(node, names, textContent)) { break; } } } /** * OFD文件解析 * @brief 参考: GB/T 33190-2016 * @param path * @param textContent */ void FileUtils::getOFDTextContent(const QString &path, QString &textContent) { QFileInfo info = QFileInfo(path); if (!info.exists() || info.isDir()) return; QuaZip zipfile(path); if (!zipfile.open(QuaZip::mdUnzip)) return; // GB/T 33190-2016规范定义可以存在多个Doc_x目录,暂时只取第一个目录的内容 QString prefix("Doc_0/Pages/"); QStringList fileList; for (const auto &file: zipfile.getFileNameList()) { if (file.startsWith(prefix)) { fileList << file; } } for (int i = 0; i < fileList.count(); ++i) { QString filename = prefix + "Page_" + QString::number(i) + "/Content.xml"; if (!zipfile.setCurrentFile(filename)) { continue; } QuaZipFile fileR(&zipfile); fileR.open(QIODevice::ReadOnly); QXmlStreamReader reader(&fileR); while (!reader.atEnd()) { if (reader.readNextStartElement() && reader.name().toString() == "TextCode") { textContent.append(reader.readElementText().replace("\n", "").replace("\r", " ")); if (textContent.length() >= MAX_CONTENT_LENGTH / 3) { fileR.close(); zipfile.close(); return; } } } fileR.close(); } zipfile.close(); } QString FileUtils::getSnippetWithoutKeyword(const QString &content, int lineCount) { QString snippet; int numOfLine = 0; QFont font(qApp->font().family()); font.setPointSizeF(qApp->font().pointSizeF()); QFontMetricsF fontMetricsF(font); qreal length = 0; int wordCount = 0; int boundaryStart = 0; int boundaryEnd = 0; QTextBoundaryFinder fm(QTextBoundaryFinder::Grapheme, content); for(;fm.position() != -1;fm.toNextBoundary()) { boundaryEnd = fm.position(); if (boundaryEnd == boundaryStart) { continue; } if (numOfLine == lineCount) { break; } QString word = content.mid(boundaryStart, boundaryEnd - boundaryStart); wordCount += boundaryEnd - boundaryStart; length = fontMetricsF.horizontalAdvance(content.mid(boundaryEnd - wordCount, wordCount)); if (length >= LABEL_MAX_WIDTH || word == "\n") { if (word == "\n") { boundaryStart = boundaryEnd; } else if (length > LABEL_MAX_WIDTH) { fm.toPreviousBoundary(); } else { boundaryStart = boundaryEnd; snippet.append(word); } snippet.append("\n"); numOfLine++; if (numOfLine == lineCount) { qreal distance = 1;//最后一位必然是\n qreal wordSize = 0; if (!(word == "\n" && length < LABEL_MAX_WIDTH)) { if (length > LABEL_MAX_WIDTH) { boundaryEnd = boundaryStart; } while (wordSize < fontMetricsF.horizontalAdvance("…")) { boundaryStart = fm.position(); wordSize += fontMetricsF.horizontalAdvance(content.mid(boundaryStart, boundaryEnd - boundaryStart)); distance += (boundaryEnd - boundaryStart); boundaryEnd = boundaryStart; fm.toPreviousBoundary(); } } snippet = snippet.left(snippet.size() - distance); snippet.append("…"); break; } length = 0; wordCount = 0; continue; } snippet.append(word); boundaryStart = boundaryEnd; } return snippet; }