From 6c510bb02bc8dfd119f0b5291cd01ad3881c1d31 Mon Sep 17 00:00:00 2001 From: zhangzihao Date: Wed, 30 Dec 2020 15:56:23 +0800 Subject: [PATCH] improve the speed of indexing word's pinyin and do not index words which include more than three multitone words --- libsearch/appsearch/appsearch.pri | 10 -- libsearch/file-utils.cpp | 159 +++++++++++++++++++++++++++++- src/mainwindow.cpp | 16 +-- 3 files changed, 165 insertions(+), 20 deletions(-) diff --git a/libsearch/appsearch/appsearch.pri b/libsearch/appsearch/appsearch.pri index f4d55af..aba082d 100644 --- a/libsearch/appsearch/appsearch.pri +++ b/libsearch/appsearch/appsearch.pri @@ -1,11 +1 @@ INCLUDEPATH += $$PWD - -include(index/index.pri) -include(appsearch/appsearch.pri) -include(settingsearch/settingsearch.pri) - -HEADERS += \ - $$PWD/app-match.h \ - -SOURCES += \ - $$PWD/app-match.cpp \ diff --git a/libsearch/file-utils.cpp b/libsearch/file-utils.cpp index 14fcee8..ed0fef7 100644 --- a/libsearch/file-utils.cpp +++ b/libsearch/file-utils.cpp @@ -9,7 +9,7 @@ #include #include #include - +#include QMap FileUtils::map_chinese2pinyin = QMap(); FileUtils::FileUtils() @@ -202,6 +202,7 @@ QString FileUtils::find(const QString &hanzi) return output; } +//DFS多音字太多直接GG void stitchMultiToneWordsDFS(const QString& hanzi, const QString& resultAllPinYin, const QString& resultFirst, QStringList& resultList){ if (hanzi.size() == 0){ resultList.append(resultAllPinYin); @@ -218,6 +219,159 @@ void stitchMultiToneWordsDFS(const QString& hanzi, const QString& resultAllPinYi } } +//BFS+Stack多音字太多会爆栈 +void stitchMultiToneWordsBFSStack(const QString& hanzi, QStringList& resultList){ + QString tempHanzi, resultAllPinYin, resultFirst; + QQueue tempQueue; + tempHanzi = hanzi; + int tempQueueSize = 0; + if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){ + for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){ + tempQueue.enqueue(i); + } + } + else{ + tempQueue.enqueue(tempHanzi.at(0)); + } + tempHanzi = tempHanzi.right(tempHanzi.size() - 1); + while (tempHanzi.size() != 0) { + tempQueueSize = tempQueue.size(); + if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){ + for (int j = 0; j < tempQueueSize; ++j){ + for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){ + tempQueue.enqueue(tempQueue.head() + i); + } + tempQueue.dequeue(); + } + } + else{ + for (int j = 0; j < tempQueueSize; ++j){ + tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0)); + tempQueue.dequeue(); + } + } + tempHanzi = tempHanzi.right(tempHanzi.size() - 1); + } + while(!tempQueue.empty()){ + resultList.append(tempQueue.dequeue()); + } +} +//BFS+Heap,多音字太多会耗尽内存 +void stitchMultiToneWordsBFSHeap(const QString& hanzi, QStringList& resultList){ + QString tempHanzi, resultAllPinYin, resultFirst; + QQueue* tempQueue = new QQueue; + tempHanzi = hanzi; + int tempQueueSize = 0; + if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){ + for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){ + tempQueue->enqueue(i); + } + } + else{ + tempQueue->enqueue(tempHanzi.at(0)); + } + tempHanzi = tempHanzi.right(tempHanzi.size() - 1); + while (tempHanzi.size() != 0) { + tempQueueSize = tempQueue->size(); + if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){ + for (int j = 0; j < tempQueueSize; ++j){ + for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){ + tempQueue->enqueue(tempQueue->head() + i); + } + tempQueue->dequeue(); + } + } + else{ + for (int j = 0; j < tempQueueSize; ++j){ + tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0)); + tempQueue->dequeue(); + } + } + tempHanzi = tempHanzi.right(tempHanzi.size() - 1); + } + while(!tempQueue->empty()){ + resultList.append(tempQueue->dequeue()); + } + delete tempQueue; + tempQueue = nullptr; +} + +//BFS+Heap+超过3个多音字只建一个索引,比较折中的方案 +void stitchMultiToneWordsBFSHeapLess3(const QString& hanzi, QStringList& resultList){ + QString tempHanzi, resultAllPinYin, resultFirst; + QQueue* tempQueue = new QQueue; + QQueue* tempQueueFirst = new QQueue; + tempHanzi = hanzi; + int tempQueueSize = 0; + int multiToneWordNum = 0; + for (auto i : hanzi){ + if (FileUtils::map_chinese2pinyin.contains(i)){ + if (FileUtils::map_chinese2pinyin[i].size() > 1){ + ++multiToneWordNum; + } + } + } + if (multiToneWordNum > 3){ + QString oneResult, oneResultFirst; + for (auto i : hanzi){ + if (FileUtils::map_chinese2pinyin.contains(i)){ + oneResult += FileUtils::map_chinese2pinyin[i].first(); + oneResultFirst += FileUtils::map_chinese2pinyin[i].first().at(0); + } + else{ + oneResult += i; + oneResultFirst += i; + } + } + resultList.append(oneResult); + resultList.append(oneResultFirst); + return; + } + + if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){ + for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){ + tempQueue->enqueue(i); + tempQueueFirst->enqueue(i.at(0)); + } + } + else{ + tempQueue->enqueue(tempHanzi.at(0)); + tempQueueFirst->enqueue(tempHanzi.at(0)); + } + tempHanzi = tempHanzi.right(tempHanzi.size() - 1); + while (tempHanzi.size() != 0) { + tempQueueSize = tempQueue->size(); + if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){ + for (int j = 0; j < tempQueueSize; ++j){ + for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){ + tempQueue->enqueue(tempQueue->head() + i); + tempQueueFirst->enqueue(tempQueueFirst->head() + i.at(0)); + } + tempQueue->dequeue(); + tempQueueFirst->dequeue(); + } + } + else{ + for (int j = 0; j < tempQueueSize; ++j){ + tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0)); + tempQueueFirst->enqueue(tempQueueFirst->head() + tempHanzi.at(0)); + tempQueue->dequeue(); + tempQueueFirst->dequeue(); + } + } + tempHanzi = tempHanzi.right(tempHanzi.size() - 1); + } + while(!tempQueue->empty()){ + resultList.append(tempQueue->dequeue()); + resultList.append(tempQueueFirst->dequeue()); + } + delete tempQueue; + delete tempQueueFirst; + tempQueue = nullptr; + tempQueueFirst = nullptr; + return; +} + QStringList FileUtils::findMultiToneWords(const QString& hanzi) { // QStringList* output = new QStringList(); @@ -225,7 +379,8 @@ QStringList FileUtils::findMultiToneWords(const QString& hanzi) QString tempAllPinYin, tempFirst; QStringList stringList = hanzi.split(""); - stitchMultiToneWordsDFS(hanzi, tempAllPinYin, tempFirst, output); +// stitchMultiToneWordsDFS(hanzi, tempAllPinYin, tempFirst, output); + stitchMultiToneWordsBFSHeapLess3(hanzi, output); // qDebug() << output; return output; } diff --git a/src/mainwindow.cpp b/src/mainwindow.cpp index a66fd39..b67de6b 100644 --- a/src/mainwindow.cpp +++ b/src/mainwindow.cpp @@ -45,15 +45,15 @@ extern void qt_blurImage(QImage &blurImage, qreal radius, bool quality, int tran MainWindow::MainWindow(QWidget *parent) : QMainWindow(parent) { -// FileUtils::findMultiToneWords("翟康宁test"); +// FileUtils::findMultiToneWords("仇仇仇仇仇仇仇仇仇仇仇翟康宁test"); /*-------------Inotify Test Start---------------*/ -// QTime t1 = QTime::currentTime(); -// InotifyManagerRefact* im = new InotifyManagerRefact("/home"); -// im->Traverse(); -// QTime t2 = QTime::currentTime(); -// qDebug() << t1; -// qDebug() << t2; -// im->start(); + QTime t1 = QTime::currentTime(); + InotifyManagerRefact* im = new InotifyManagerRefact("/home"); + im->Traverse(); + QTime t2 = QTime::currentTime(); + qDebug() << t1; + qDebug() << t2; + im->start(); /*-------------Inotify Test End-----------------*/ this->setWindowFlags(Qt::CustomizeWindowHint | Qt::FramelessWindowHint | Qt::X11BypassWindowManagerHint);