forked from openkylin/ukui-search
整理一些头文件引用,解决打包编译问题
This commit is contained in:
parent
0e7d204f11
commit
b172baca45
|
@ -21,6 +21,7 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
#include <syslog.h>
|
#include <syslog.h>
|
||||||
#include <KWindowSystem>
|
#include <KWindowSystem>
|
||||||
#include "ukui-search-gui.h"
|
#include "ukui-search-gui.h"
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#include <QDBusReply>
|
#include <QDBusReply>
|
||||||
|
#include <QDesktopServices>
|
||||||
#include "web-search-view.h"
|
#include "web-search-view.h"
|
||||||
#define MAIN_MARGINS 0,0,0,0
|
#define MAIN_MARGINS 0,0,0,0
|
||||||
#define MAIN_SPACING 0
|
#define MAIN_SPACING 0
|
||||||
|
|
|
@ -28,14 +28,180 @@
|
||||||
#include <QDomDocument>
|
#include <QDomDocument>
|
||||||
#include <QDBusInterface>
|
#include <QDBusInterface>
|
||||||
#include <QDBusReply>
|
#include <QDBusReply>
|
||||||
|
#include <QDesktopServices>
|
||||||
|
#include <QMimeDatabase>
|
||||||
|
#include <QCryptographicHash>
|
||||||
|
#include <QFileInfo>
|
||||||
|
#include <QFile>
|
||||||
|
#include <QApplication>
|
||||||
|
#include <QDir>
|
||||||
|
#include <QDebug>
|
||||||
|
#include <QUrl>
|
||||||
|
#include <QDomElement>
|
||||||
|
#include <QClipboard>
|
||||||
|
#include <QQueue>
|
||||||
|
#include <QFontMetrics>
|
||||||
|
#include <quazip5/quazipfile.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <quazip5/quazip.h>
|
||||||
|
#include <uchardet/uchardet.h>
|
||||||
|
#include <poppler/qt5/poppler-qt5.h>
|
||||||
#include "gobject-template.h"
|
#include "gobject-template.h"
|
||||||
#include "hanzi-to-pinyin.h"
|
#include "hanzi-to-pinyin.h"
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
using namespace UkuiSearch;
|
using namespace UkuiSearch;
|
||||||
size_t FileUtils::maxIndexCount = 0;
|
|
||||||
unsigned short FileUtils::indexStatus = 0;
|
#define MAX_CONTENT_LENGTH 20480000
|
||||||
QMap<QString, QStringList> FileUtils::map_chinese2pinyin = QMap<QString, QStringList>();
|
|
||||||
static QMutex iconMutex;
|
static QMutex iconMutex;
|
||||||
|
/**
|
||||||
|
* @brief 查找elem的子节点
|
||||||
|
* @param elem 起始节点
|
||||||
|
* @param names 名称链
|
||||||
|
* @param nodes 查找到的全部结果
|
||||||
|
*/
|
||||||
|
void findNodes(const QDomElement &elem, QQueue<QString> &names, QList<QDomElement> &nodes)
|
||||||
|
{
|
||||||
|
QString targetName = names.dequeue();
|
||||||
|
QDomNode node = elem.firstChild();
|
||||||
|
while (!node.isNull()) {
|
||||||
|
QDomElement e = node.toElement();
|
||||||
|
if (!e.isNull() && e.tagName() == targetName) {
|
||||||
|
if (names.empty()) {
|
||||||
|
nodes.append(e);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
findNodes(e, names, nodes);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node = node.nextSibling();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void findNodesByAttr(const QDomElement &elem, QQueue <QString> &names, QList <QDomElement> &nodes, const QString &attr, const QStringList &values)
|
||||||
|
{
|
||||||
|
findNodes(elem, names, nodes);
|
||||||
|
|
||||||
|
QList<QDomElement>::iterator it = nodes.begin();
|
||||||
|
while (it != nodes.end()) {
|
||||||
|
if ((*it).hasAttribute(attr) && values.contains((*it).attribute(attr))) {
|
||||||
|
it++;
|
||||||
|
} else {
|
||||||
|
it = nodes.erase(it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool findNodeText(const QDomElement &elem, QQueue<QString> &names, QString &content)
|
||||||
|
{
|
||||||
|
QList<QDomElement> nodes;
|
||||||
|
findNodes(elem, names, nodes);
|
||||||
|
|
||||||
|
for (const auto &node : nodes) {
|
||||||
|
content.append(node.text());
|
||||||
|
if (content.length() >= MAX_CONTENT_LENGTH / 3) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void findNodeAttr(const QDomElement &elem, QQueue<QString> &names, const QString &attr, QStringList &attrs)
|
||||||
|
{
|
||||||
|
QList<QDomElement> nodes;
|
||||||
|
findNodes(elem, names, nodes);
|
||||||
|
|
||||||
|
for (const auto &node : nodes) {
|
||||||
|
if (node.hasAttribute(attr)) {
|
||||||
|
attrs.append(node.attribute(attr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void processUOFPPT(const QDomDocument &doc, QString &content)
|
||||||
|
{
|
||||||
|
QDomElement rootElem = doc.documentElement();
|
||||||
|
QList<QDomElement> nodes;
|
||||||
|
QQueue<QString> names; //每个节点的名称
|
||||||
|
names << "uof:演示文稿" << "演:主体" << "演:幻灯片集" << "演:幻灯片";
|
||||||
|
|
||||||
|
findNodes(rootElem, names, nodes);
|
||||||
|
|
||||||
|
if (nodes.empty()) {
|
||||||
|
//TODO 在uof-ppt不存在锚点节点时,直接查找文本节点?
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
QStringList objs;
|
||||||
|
//每一个 演:幻灯片 -> 锚点
|
||||||
|
for (const auto &node : nodes) {
|
||||||
|
names.clear();
|
||||||
|
names << "uof:锚点";
|
||||||
|
findNodeAttr(node, names, "uof:图形引用", objs);
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes.clear();
|
||||||
|
names.clear();
|
||||||
|
names << "uof:对象集" << "图:图形";
|
||||||
|
findNodesByAttr(rootElem, names, nodes, "图:标识符", objs);
|
||||||
|
|
||||||
|
if (nodes.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<QDomElement> paraNodes; //全部段落节点
|
||||||
|
for (const auto &node : nodes) {
|
||||||
|
names.clear();
|
||||||
|
names << "图:文本内容" << "字:段落";
|
||||||
|
findNodes(node, names, paraNodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes.clear();
|
||||||
|
for (const auto &node : paraNodes) {
|
||||||
|
names.clear();
|
||||||
|
names << "字:句";
|
||||||
|
findNodes(node, names, nodes); //全部段落下的全部句节点
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &node : nodes) {
|
||||||
|
names.clear();
|
||||||
|
names << "字:文本串";
|
||||||
|
if (findNodeText(node, names, content)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool loadZipFileToDoc(QuaZip &zipFile, QDomDocument &doc, const QString &fileName)
|
||||||
|
{
|
||||||
|
if (!zipFile.isOpen() && !zipFile.open(QuaZip::mdUnzip)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!zipFile.setCurrentFile(fileName)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
QuaZipFile file(&zipFile);
|
||||||
|
if (!file.open(QIODevice::ReadOnly)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
doc.clear();
|
||||||
|
if (!doc.setContent(&file)) {
|
||||||
|
file.close();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
FileUtils::FileUtils() {
|
FileUtils::FileUtils() {
|
||||||
}
|
}
|
||||||
|
@ -198,24 +364,6 @@ bool FileUtils::isOrUnder(QString pathA, QString pathB)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void FileUtils::loadHanziTable(const QString &fileName) {
|
|
||||||
QFile file(fileName);
|
|
||||||
if(!file.open(QFile::ReadOnly | QFile::Text)) {
|
|
||||||
qDebug("File: '%s' open failed!", file.fileName().toStdString().c_str());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 读取汉字对照表文件并转换为QMap存储 */
|
|
||||||
while(!file.atEnd()) {
|
|
||||||
QString content = QString::fromUtf8(file.readLine());
|
|
||||||
FileUtils::map_chinese2pinyin[content.split(" ").last().trimmed()] = content.split(" ").first().split(",");
|
|
||||||
}
|
|
||||||
file.close();
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
QMimeType FileUtils::getMimetype(const QString &path) {
|
QMimeType FileUtils::getMimetype(const QString &path) {
|
||||||
QMimeDatabase mdb;
|
QMimeDatabase mdb;
|
||||||
QMimeType type = mdb.mimeTypeForFile(path, QMimeDatabase::MatchContent);
|
QMimeType type = mdb.mimeTypeForFile(path, QMimeDatabase::MatchContent);
|
||||||
|
@ -223,261 +371,8 @@ QMimeType FileUtils::getMimetype(const QString &path) {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
//aborted
|
|
||||||
QString FileUtils::find(const QString &hanzi) {
|
|
||||||
// static QMap<QString, QStringList> map = loadHanziTable("://index/pinyinWithoutTone.txt");
|
|
||||||
// static QMap<QString, QStringList> map;
|
|
||||||
QString output;
|
|
||||||
QStringList stringList = hanzi.split("");
|
|
||||||
|
|
||||||
/* 遍历查找汉字-拼音对照表的内容并将汉字替换为拼音 */
|
|
||||||
for(const QString &str : stringList) {
|
|
||||||
if(FileUtils::map_chinese2pinyin.contains(str))
|
|
||||||
output += FileUtils::map_chinese2pinyin[str].first();
|
|
||||||
else
|
|
||||||
output += str;
|
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
//DFS多音字太多直接GG
|
|
||||||
void stitchMultiToneWordsDFS(const QString &hanzi, const QString &resultAllPinYin, const QString &resultFirst, QStringList &resultList) {
|
|
||||||
if(hanzi.size() == 0) {
|
|
||||||
resultList.append(resultAllPinYin);
|
|
||||||
resultList.append(resultFirst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if(FileUtils::map_chinese2pinyin.contains(hanzi.at(0))) {
|
|
||||||
for(auto i : FileUtils::map_chinese2pinyin[hanzi.at(0)]) {
|
|
||||||
stitchMultiToneWordsDFS(hanzi.right(hanzi.size() - 1), resultAllPinYin + i, resultFirst + i.at(0), resultList);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
stitchMultiToneWordsDFS(hanzi.right(hanzi.size() - 1), resultAllPinYin + hanzi.at(0), resultFirst + hanzi.at(0), resultList);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//BFS+Stack多音字太多会爆栈
|
|
||||||
void stitchMultiToneWordsBFSStack(const QString &hanzi, QStringList &resultList) {
|
|
||||||
QString tempHanzi, resultAllPinYin, resultFirst;
|
|
||||||
QQueue<QString> tempQueue;
|
|
||||||
tempHanzi = hanzi;
|
|
||||||
int tempQueueSize = 0;
|
|
||||||
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
||||||
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
|
||||||
tempQueue.enqueue(i);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tempQueue.enqueue(tempHanzi.at(0));
|
|
||||||
}
|
|
||||||
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
||||||
while(tempHanzi.size() != 0) {
|
|
||||||
tempQueueSize = tempQueue.size();
|
|
||||||
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
||||||
for(int j = 0; j < tempQueueSize; ++j) {
|
|
||||||
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
|
||||||
tempQueue.enqueue(tempQueue.head() + i);
|
|
||||||
}
|
|
||||||
tempQueue.dequeue();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for(int j = 0; j < tempQueueSize; ++j) {
|
|
||||||
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
|
|
||||||
tempQueue.dequeue();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
||||||
}
|
|
||||||
while(!tempQueue.empty()) {
|
|
||||||
resultList.append(tempQueue.dequeue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//BFS+Heap,多音字太多会耗尽内存
|
|
||||||
void stitchMultiToneWordsBFSHeap(const QString &hanzi, QStringList &resultList) {
|
|
||||||
QString tempHanzi, resultAllPinYin, resultFirst;
|
|
||||||
QQueue<QString>* tempQueue = new QQueue<QString>;
|
|
||||||
tempHanzi = hanzi;
|
|
||||||
int tempQueueSize = 0;
|
|
||||||
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
||||||
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
|
||||||
tempQueue->enqueue(i);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tempQueue->enqueue(tempHanzi.at(0));
|
|
||||||
}
|
|
||||||
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
||||||
while(tempHanzi.size() != 0) {
|
|
||||||
tempQueueSize = tempQueue->size();
|
|
||||||
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
||||||
for(int j = 0; j < tempQueueSize; ++j) {
|
|
||||||
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
|
||||||
tempQueue->enqueue(tempQueue->head() + i);
|
|
||||||
}
|
|
||||||
tempQueue->dequeue();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for(int j = 0; j < tempQueueSize; ++j) {
|
|
||||||
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
|
|
||||||
tempQueue->dequeue();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
||||||
}
|
|
||||||
while(!tempQueue->empty()) {
|
|
||||||
resultList.append(tempQueue->dequeue());
|
|
||||||
}
|
|
||||||
delete tempQueue;
|
|
||||||
tempQueue = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
//BFS+Heap+超过3个多音字只建一个索引,比较折中的方案
|
|
||||||
void stitchMultiToneWordsBFSHeapLess3(const QString &hanzi, QStringList &resultList) {
|
|
||||||
QString tempHanzi, resultAllPinYin, resultFirst;
|
|
||||||
QQueue<QString>* tempQueue = new QQueue<QString>;
|
|
||||||
QQueue<QString>* tempQueueFirst = new QQueue<QString>;
|
|
||||||
tempHanzi = hanzi;
|
|
||||||
int tempQueueSize = 0;
|
|
||||||
int multiToneWordNum = 0;
|
|
||||||
for(auto i : hanzi) {
|
|
||||||
if(FileUtils::map_chinese2pinyin.contains(i)) {
|
|
||||||
if(FileUtils::map_chinese2pinyin[i].size() > 1) {
|
|
||||||
++multiToneWordNum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(multiToneWordNum > 3) {
|
|
||||||
QString oneResult, oneResultFirst;
|
|
||||||
for(auto i : hanzi) {
|
|
||||||
if(FileUtils::map_chinese2pinyin.contains(i)) {
|
|
||||||
oneResult += FileUtils::map_chinese2pinyin[i].first();
|
|
||||||
oneResultFirst += FileUtils::map_chinese2pinyin[i].first().at(0);
|
|
||||||
} else {
|
|
||||||
oneResult += i;
|
|
||||||
oneResultFirst += i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
resultList.append(oneResult);
|
|
||||||
resultList.append(oneResultFirst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
||||||
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
|
||||||
tempQueue->enqueue(i);
|
|
||||||
tempQueueFirst->enqueue(i.at(0));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tempQueue->enqueue(tempHanzi.at(0));
|
|
||||||
tempQueueFirst->enqueue(tempHanzi.at(0));
|
|
||||||
}
|
|
||||||
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
||||||
while(tempHanzi.size() != 0) {
|
|
||||||
tempQueueSize = tempQueue->size();
|
|
||||||
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
|
|
||||||
for(int j = 0; j < tempQueueSize; ++j) {
|
|
||||||
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
|
|
||||||
tempQueue->enqueue(tempQueue->head() + i);
|
|
||||||
tempQueueFirst->enqueue(tempQueueFirst->head() + i.at(0));
|
|
||||||
}
|
|
||||||
tempQueue->dequeue();
|
|
||||||
tempQueueFirst->dequeue();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for(int j = 0; j < tempQueueSize; ++j) {
|
|
||||||
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
|
|
||||||
tempQueueFirst->enqueue(tempQueueFirst->head() + tempHanzi.at(0));
|
|
||||||
tempQueue->dequeue();
|
|
||||||
tempQueueFirst->dequeue();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
||||||
}
|
|
||||||
while(!tempQueue->empty()) {
|
|
||||||
resultList.append(tempQueue->dequeue());
|
|
||||||
resultList.append(tempQueueFirst->dequeue());
|
|
||||||
}
|
|
||||||
delete tempQueue;
|
|
||||||
delete tempQueueFirst;
|
|
||||||
tempQueue = nullptr;
|
|
||||||
tempQueueFirst = nullptr;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
//BFS+Stack+超过3个多音字只建一个索引,比较折中的方案
|
|
||||||
void stitchMultiToneWordsBFSStackLess3(const QString &hanzi, QStringList &resultList) {
|
|
||||||
QString tempHanzi;
|
|
||||||
QQueue<QString> tempQueue;
|
|
||||||
QQueue<QString> tempQueueFirst;
|
|
||||||
tempHanzi = hanzi;
|
|
||||||
int tempQueueSize = 0;
|
|
||||||
int multiToneWordNum = 0;
|
|
||||||
|
|
||||||
for (auto i:hanzi) {
|
|
||||||
if (HanZiToPinYin::getInstance()->isMultiTone(QString(i).toStdString()))
|
|
||||||
++multiToneWordNum;
|
|
||||||
}
|
|
||||||
if(multiToneWordNum > 3) {
|
|
||||||
QString oneResult, oneResultFirst;
|
|
||||||
for(auto i : hanzi) {
|
|
||||||
QStringList results;
|
|
||||||
HanZiToPinYin::getInstance()->getResults(QString(i).toStdString(), results);
|
|
||||||
if(results.size()) {
|
|
||||||
oneResult += results.first();
|
|
||||||
oneResultFirst += results.first().at(0);
|
|
||||||
} else {
|
|
||||||
oneResult += i;
|
|
||||||
oneResultFirst += i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
resultList.append(oneResult);
|
|
||||||
resultList.append(oneResultFirst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
QStringList results;
|
|
||||||
HanZiToPinYin::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
|
|
||||||
if(results.size()) {
|
|
||||||
for(auto i : results) {
|
|
||||||
tempQueue.enqueue(i);
|
|
||||||
tempQueueFirst.enqueue(i.at(0));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tempQueue.enqueue(tempHanzi.at(0));
|
|
||||||
tempQueueFirst.enqueue(tempHanzi.at(0));
|
|
||||||
}
|
|
||||||
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
||||||
while(tempHanzi.size() != 0) {
|
|
||||||
HanZiToPinYin::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
|
|
||||||
tempQueueSize = tempQueue.size();
|
|
||||||
if(results.size()) {
|
|
||||||
for(int j = 0; j < tempQueueSize; ++j) {
|
|
||||||
for(auto i : results) {
|
|
||||||
tempQueue.enqueue(tempQueue.head() + i);
|
|
||||||
tempQueueFirst.enqueue(tempQueueFirst.head() + i.at(0));
|
|
||||||
}
|
|
||||||
tempQueue.dequeue();
|
|
||||||
tempQueueFirst.dequeue();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for(int j = 0; j < tempQueueSize; ++j) {
|
|
||||||
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
|
|
||||||
tempQueueFirst.enqueue(tempQueueFirst.head() + tempHanzi.at(0));
|
|
||||||
tempQueue.dequeue();
|
|
||||||
tempQueueFirst.dequeue();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
|
||||||
}
|
|
||||||
while(!tempQueue.empty()) {
|
|
||||||
resultList.append(tempQueue.dequeue());
|
|
||||||
resultList.append(tempQueueFirst.dequeue());
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
QStringList FileUtils::findMultiToneWords(const QString &hanzi) {
|
QStringList FileUtils::findMultiToneWords(const QString &hanzi) {
|
||||||
QStringList output, results;
|
QStringList output, results;
|
||||||
//stitchMultiToneWordsBFSStackLess3(hanzi, output);
|
|
||||||
HanZiToPinYin::getInstance()->getResults(hanzi.toStdString(), results);
|
HanZiToPinYin::getInstance()->getResults(hanzi.toStdString(), results);
|
||||||
QString oneResult(results.join(""));
|
QString oneResult(results.join(""));
|
||||||
QString firstLetter;
|
QString firstLetter;
|
||||||
|
@ -1162,124 +1057,9 @@ void FileUtils::getUOFTextContent(const QString &path, QString &textContent)
|
||||||
file.close();
|
file.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
void FileUtils::processUOFPPT(const QDomDocument &doc, QString &content)
|
|
||||||
{
|
|
||||||
QDomElement rootElem = doc.documentElement();
|
|
||||||
QList<QDomElement> nodes;
|
|
||||||
QQueue<QString> names; //每个节点的名称
|
|
||||||
names << "uof:演示文稿" << "演:主体" << "演:幻灯片集" << "演:幻灯片";
|
|
||||||
|
|
||||||
findNodes(rootElem, names, nodes);
|
|
||||||
|
|
||||||
if (nodes.empty()) {
|
|
||||||
//TODO 在uof-ppt不存在锚点节点时,直接查找文本节点?
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
QStringList objs;
|
|
||||||
//每一个 演:幻灯片 -> 锚点
|
|
||||||
for (const auto &node : nodes) {
|
|
||||||
names.clear();
|
|
||||||
names << "uof:锚点";
|
|
||||||
findNodeAttr(node, names, "uof:图形引用", objs);
|
|
||||||
}
|
|
||||||
|
|
||||||
nodes.clear();
|
|
||||||
names.clear();
|
|
||||||
names << "uof:对象集" << "图:图形";
|
|
||||||
findNodesByAttr(rootElem, names, nodes, "图:标识符", objs);
|
|
||||||
|
|
||||||
if (nodes.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
QList<QDomElement> paraNodes; //全部段落节点
|
|
||||||
for (const auto &node : nodes) {
|
|
||||||
names.clear();
|
|
||||||
names << "图:文本内容" << "字:段落";
|
|
||||||
findNodes(node, names, paraNodes);
|
|
||||||
}
|
|
||||||
|
|
||||||
nodes.clear();
|
|
||||||
for (const auto &node : paraNodes) {
|
|
||||||
names.clear();
|
|
||||||
names << "字:句";
|
|
||||||
findNodes(node, names, nodes); //全部段落下的全部句节点
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto &node : nodes) {
|
|
||||||
names.clear();
|
|
||||||
names << "字:文本串";
|
|
||||||
if (findNodeText(node, names, content)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief 查找elem的子节点
|
|
||||||
* @param elem 起始节点
|
|
||||||
* @param names 名称链
|
|
||||||
* @param nodes 查找到的全部结果
|
|
||||||
*/
|
|
||||||
void FileUtils::findNodes(const QDomElement &elem, QQueue<QString> &names, QList<QDomElement> &nodes)
|
|
||||||
{
|
|
||||||
QString targetName = names.dequeue();
|
|
||||||
QDomNode node = elem.firstChild();
|
|
||||||
while (!node.isNull()) {
|
|
||||||
QDomElement e = node.toElement();
|
|
||||||
if (!e.isNull() && e.tagName() == targetName) {
|
|
||||||
if (names.empty()) {
|
|
||||||
nodes.append(e);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
findNodes(e, names, nodes);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
node = node.nextSibling();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void FileUtils::findNodesByAttr(const QDomElement &elem, QQueue <QString> &names, QList <QDomElement> &nodes, const QString &attr, const QStringList &values)
|
|
||||||
{
|
|
||||||
findNodes(elem, names, nodes);
|
|
||||||
|
|
||||||
QList<QDomElement>::iterator it = nodes.begin();
|
|
||||||
while (it != nodes.end()) {
|
|
||||||
if ((*it).hasAttribute(attr) && values.contains((*it).attribute(attr))) {
|
|
||||||
it++;
|
|
||||||
} else {
|
|
||||||
it = nodes.erase(it);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool FileUtils::findNodeText(const QDomElement &elem, QQueue<QString> &names, QString &content)
|
|
||||||
{
|
|
||||||
QList<QDomElement> nodes;
|
|
||||||
findNodes(elem, names, nodes);
|
|
||||||
|
|
||||||
for (const auto &node : nodes) {
|
|
||||||
content.append(node.text());
|
|
||||||
if (content.length() >= MAX_CONTENT_LENGTH / 3) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void FileUtils::findNodeAttr(const QDomElement &elem, QQueue<QString> &names, const QString &attr, QStringList &attrs)
|
|
||||||
{
|
|
||||||
QList<QDomElement> nodes;
|
|
||||||
findNodes(elem, names, nodes);
|
|
||||||
|
|
||||||
for (const auto &node : nodes) {
|
|
||||||
if (node.hasAttribute(attr)) {
|
|
||||||
attrs.append(node.attribute(attr));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* uof2.0解析
|
* uof2.0解析
|
||||||
|
@ -1388,30 +1168,6 @@ void FileUtils::getUOF2PPTContent(const QString &path, QString &textContent)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool FileUtils::loadZipFileToDoc(QuaZip &zipFile, QDomDocument &doc, const QString &fileName)
|
|
||||||
{
|
|
||||||
if (!zipFile.isOpen() && !zipFile.open(QuaZip::mdUnzip)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!zipFile.setCurrentFile(fileName)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
QuaZipFile file(&zipFile);
|
|
||||||
if (!file.open(QIODevice::ReadOnly)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
doc.clear();
|
|
||||||
if (!doc.setContent(&file)) {
|
|
||||||
file.close();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
file.close();
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* OFD文件解析
|
* OFD文件解析
|
||||||
|
|
|
@ -22,44 +22,12 @@
|
||||||
#ifndef FILEUTILS_H
|
#ifndef FILEUTILS_H
|
||||||
#define FILEUTILS_H
|
#define FILEUTILS_H
|
||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QCryptographicHash>
|
|
||||||
#include <QIcon>
|
#include <QIcon>
|
||||||
#include <QMap>
|
#include <QMap>
|
||||||
#include <QMimeDatabase>
|
|
||||||
#include <QMimeType>
|
#include <QMimeType>
|
||||||
#include <QDir>
|
|
||||||
#include <QDebug>
|
|
||||||
#include <QFile>
|
|
||||||
#include <QFileInfo>
|
|
||||||
#include <QUrl>
|
|
||||||
#include <QMap>
|
|
||||||
#include <QQueue>
|
|
||||||
#include <QDesktopServices>
|
|
||||||
#include <QApplication>
|
|
||||||
#include <QClipboard>
|
|
||||||
#include <QFontMetrics>
|
|
||||||
#include <QLabel>
|
#include <QLabel>
|
||||||
|
|
||||||
#include <quazip5/quazipfile.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <quazip5/quazip.h>
|
|
||||||
#include <uchardet/uchardet.h>
|
|
||||||
//#include <poppler-qt5.h>
|
|
||||||
#include <poppler/qt5/poppler-qt5.h>
|
|
||||||
|
|
||||||
#include "libsearch_global.h"
|
#include "libsearch_global.h"
|
||||||
#include "common.h"
|
|
||||||
#include <leptonica/allheaders.h>
|
|
||||||
|
|
||||||
//#define INITIAL_STATE 0
|
|
||||||
//#define CREATING_INDEX 1
|
|
||||||
//#define FINISH_CREATING_INDEX 2
|
|
||||||
#define MAX_CONTENT_LENGTH 20480000
|
|
||||||
namespace UkuiSearch {
|
namespace UkuiSearch {
|
||||||
class LIBSEARCH_EXPORT FileUtils {
|
class LIBSEARCH_EXPORT FileUtils {
|
||||||
public:
|
public:
|
||||||
|
@ -77,12 +45,7 @@ public:
|
||||||
static QString getSettingName(const QString &setting);
|
static QString getSettingName(const QString &setting);
|
||||||
//A is or under B
|
//A is or under B
|
||||||
static bool isOrUnder(QString pathA, QString pathB);
|
static bool isOrUnder(QString pathA, QString pathB);
|
||||||
|
|
||||||
//chinese character to pinyin
|
|
||||||
static QMap<QString, QStringList> map_chinese2pinyin;
|
|
||||||
static QString find(const QString &hanzi);
|
|
||||||
static QStringList findMultiToneWords(const QString &hanzi);
|
static QStringList findMultiToneWords(const QString &hanzi);
|
||||||
static void loadHanziTable(const QString &fileName);
|
|
||||||
|
|
||||||
//parse text,docx.....
|
//parse text,docx.....
|
||||||
static QMimeType getMimetype(const QString &path);
|
static QMimeType getMimetype(const QString &path);
|
||||||
|
@ -110,16 +73,6 @@ public:
|
||||||
*/
|
*/
|
||||||
static bool isEncrypedOrUnsupport(const QString &path, const QString &suffix);
|
static bool isEncrypedOrUnsupport(const QString &path, const QString &suffix);
|
||||||
static bool isOcrSupportSize(QString path);
|
static bool isOcrSupportSize(QString path);
|
||||||
static size_t maxIndexCount;
|
|
||||||
static unsigned short indexStatus;
|
|
||||||
|
|
||||||
private:
|
|
||||||
static void findNodes(const QDomElement &elem, QQueue<QString> &names, QList<QDomElement> &nodes);
|
|
||||||
static inline void findNodesByAttr(const QDomElement&, QQueue<QString>&, QList<QDomElement>&, const QString &, const QStringList&);
|
|
||||||
static inline bool findNodeText(const QDomElement &elem, QQueue<QString> &names, QString &content);
|
|
||||||
static inline void findNodeAttr(const QDomElement &elem, QQueue<QString> &names, const QString &attr, QStringList &attrs);
|
|
||||||
static void processUOFPPT(const QDomDocument &doc, QString &content);
|
|
||||||
static inline bool loadZipFileToDoc(QuaZip &zipFile, QDomDocument &doc, const QString &fileName);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
FileUtils();
|
FileUtils();
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "file-system-watcher-private.h"
|
#include "file-system-watcher-private.h"
|
||||||
#include <sys/inotify.h>
|
#include <sys/inotify.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
|
#include <unistd.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <QDir>
|
#include <QDir>
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include <QtConcurrent>
|
#include <QtConcurrent>
|
||||||
#include <QPalette>
|
#include <QPalette>
|
||||||
#include "global-settings.h"
|
#include "global-settings.h"
|
||||||
|
#define UKUI_SEARCH_SCHEMAS "org.ukui.search.settings"
|
||||||
|
|
||||||
using namespace UkuiSearch;
|
using namespace UkuiSearch;
|
||||||
static GlobalSettings *globalInstance = nullptr;
|
static GlobalSettings *globalInstance = nullptr;
|
||||||
|
|
|
@ -45,7 +45,6 @@
|
||||||
#define FONT_SIZE_KEY "systemFontSize"
|
#define FONT_SIZE_KEY "systemFontSize"
|
||||||
#define ICON_THEME_KEY "iconThemeName"
|
#define ICON_THEME_KEY "iconThemeName"
|
||||||
|
|
||||||
#define UKUI_SEARCH_SCHEMAS "org.ukui.search.settings"
|
|
||||||
#define FILE_INDEX_ENABLE_KEY "fileIndexEnable"
|
#define FILE_INDEX_ENABLE_KEY "fileIndexEnable"
|
||||||
#define WEB_ENGINE_KEY "webEngine"
|
#define WEB_ENGINE_KEY "webEngine"
|
||||||
#define CONTENT_FUZZY_SEARCH_KEY "contentFuzzySearch"
|
#define CONTENT_FUZZY_SEARCH_KEY "contentFuzzySearch"
|
||||||
|
|
|
@ -18,9 +18,10 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#include "basic-indexer.h"
|
#include "basic-indexer.h"
|
||||||
#include "file-utils.h"
|
#include <QDateTime>
|
||||||
#include <QFileInfo>
|
#include <QFileInfo>
|
||||||
#include <QUrl>
|
#include <QUrl>
|
||||||
|
#include "file-utils.h"
|
||||||
using namespace UkuiSearch;
|
using namespace UkuiSearch;
|
||||||
BasicIndexer::BasicIndexer(const QString& filePath): m_filePath(filePath)
|
BasicIndexer::BasicIndexer(const QString& filePath): m_filePath(filePath)
|
||||||
{
|
{
|
||||||
|
|
|
@ -20,7 +20,9 @@
|
||||||
#include "batch-indexer.h"
|
#include "batch-indexer.h"
|
||||||
#include <QFileInfo>
|
#include <QFileInfo>
|
||||||
#include <QTime>
|
#include <QTime>
|
||||||
|
#include <QDebug>
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
|
#include <QQueue>
|
||||||
|
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
#include "basic-indexer.h"
|
#include "basic-indexer.h"
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#include "file-content-indexer.h"
|
#include "file-content-indexer.h"
|
||||||
|
#include <QDateTime>
|
||||||
#include "file-reader.h"
|
#include "file-reader.h"
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
#include "chinese-segmentation.h"
|
#include "chinese-segmentation.h"
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
#include "binary-parser.h"
|
#include "binary-parser.h"
|
||||||
#include "ocrobject.h"
|
#include "ocrobject.h"
|
||||||
|
#include "common.h"
|
||||||
using namespace UkuiSearch;
|
using namespace UkuiSearch;
|
||||||
FileReader::FileReader(QObject *parent) : QObject(parent)
|
FileReader::FileReader(QObject *parent) : QObject(parent)
|
||||||
{
|
{
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include <QWidget>
|
#include <QWidget>
|
||||||
#include <QDBusReply>
|
#include <QDBusReply>
|
||||||
#include <QDBusArgument>
|
#include <QDBusArgument>
|
||||||
|
#include <QFile>
|
||||||
#include "settings-search-plugin.h"
|
#include "settings-search-plugin.h"
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#include <QDBusReply>
|
#include <QDBusReply>
|
||||||
|
#include <QDesktopServices>
|
||||||
#include "web-search-plugin.h"
|
#include "web-search-plugin.h"
|
||||||
#include "global-settings.h"
|
#include "global-settings.h"
|
||||||
#define WEB_ENGINE_KEY "webEngine"
|
#define WEB_ENGINE_KEY "webEngine"
|
||||||
|
|
|
@ -44,6 +44,7 @@ SOURCES += \
|
||||||
HEADERS += \
|
HEADERS += \
|
||||||
ukui-search-dir-manager-dbus.h
|
ukui-search-dir-manager-dbus.h
|
||||||
|
|
||||||
LIBS += -L$$OUT_PWD/../libsearch -lukui-search
|
LIBS += -L$$OUT_PWD/../libsearch -lukui-search \
|
||||||
|
-L$$OUT_PWD/../libchinese-segmentation -lchinese-segmentation
|
||||||
INCLUDEPATH += $$PWD/../libsearch
|
INCLUDEPATH += $$PWD/../libsearch
|
||||||
DEPENDPATH += $$PWD/../libsearch
|
DEPENDPATH += $$PWD/../libsearch
|
||||||
|
|
|
@ -24,7 +24,8 @@ include(../libsearch/libukui-search-headers.pri)
|
||||||
include(../3rd-parties/qtsingleapplication/qtsingleapplication.pri)
|
include(../3rd-parties/qtsingleapplication/qtsingleapplication.pri)
|
||||||
include(./qml/qml.pri)
|
include(./qml/qml.pri)
|
||||||
|
|
||||||
LIBS += -L$$OUT_PWD/../libsearch -lukui-search
|
LIBS += -L$$OUT_PWD/../libsearch -lukui-search \
|
||||||
|
-L$$OUT_PWD/../libchinese-segmentation -lchinese-segmentation
|
||||||
|
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
main.cpp \
|
main.cpp \
|
||||||
|
|
Loading…
Reference in New Issue