Merge pull request #32 from MouseZhangZh/1230-dev

🤡🤡🤡improve the speed of indexing word's pinyin and do not index words wh…
This commit is contained in:
iaom 2020-12-30 16:08:12 +08:00 committed by GitHub
commit 707f823a79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 165 additions and 20 deletions

View File

@ -1,11 +1 @@
INCLUDEPATH += $$PWD INCLUDEPATH += $$PWD
include(index/index.pri)
include(appsearch/appsearch.pri)
include(settingsearch/settingsearch.pri)
HEADERS += \
$$PWD/app-match.h \
SOURCES += \
$$PWD/app-match.cpp \

View File

@ -9,7 +9,7 @@
#include <QDomDocument> #include <QDomDocument>
#include <QMimeDatabase> #include <QMimeDatabase>
#include <QMimeType> #include <QMimeType>
#include <QQueue>
QMap<QString, QStringList> FileUtils::map_chinese2pinyin = QMap<QString, QStringList>(); QMap<QString, QStringList> FileUtils::map_chinese2pinyin = QMap<QString, QStringList>();
FileUtils::FileUtils() FileUtils::FileUtils()
@ -202,6 +202,7 @@ QString FileUtils::find(const QString &hanzi)
return output; return output;
} }
//DFS多音字太多直接GG
void stitchMultiToneWordsDFS(const QString& hanzi, const QString& resultAllPinYin, const QString& resultFirst, QStringList& resultList){ void stitchMultiToneWordsDFS(const QString& hanzi, const QString& resultAllPinYin, const QString& resultFirst, QStringList& resultList){
if (hanzi.size() == 0){ if (hanzi.size() == 0){
resultList.append(resultAllPinYin); resultList.append(resultAllPinYin);
@ -218,6 +219,159 @@ void stitchMultiToneWordsDFS(const QString& hanzi, const QString& resultAllPinYi
} }
} }
//BFS+Stack多音字太多会爆栈
void stitchMultiToneWordsBFSStack(const QString& hanzi, QStringList& resultList){
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString> tempQueue;
tempHanzi = hanzi;
int tempQueueSize = 0;
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
tempQueue.enqueue(i);
}
}
else{
tempQueue.enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
while (tempHanzi.size() != 0) {
tempQueueSize = tempQueue.size();
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (int j = 0; j < tempQueueSize; ++j){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
tempQueue.enqueue(tempQueue.head() + i);
}
tempQueue.dequeue();
}
}
else{
for (int j = 0; j < tempQueueSize; ++j){
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
tempQueue.dequeue();
}
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
while(!tempQueue.empty()){
resultList.append(tempQueue.dequeue());
}
}
//BFS+Heap多音字太多会耗尽内存
void stitchMultiToneWordsBFSHeap(const QString& hanzi, QStringList& resultList){
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString>* tempQueue = new QQueue<QString>;
tempHanzi = hanzi;
int tempQueueSize = 0;
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
tempQueue->enqueue(i);
}
}
else{
tempQueue->enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
while (tempHanzi.size() != 0) {
tempQueueSize = tempQueue->size();
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (int j = 0; j < tempQueueSize; ++j){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
tempQueue->enqueue(tempQueue->head() + i);
}
tempQueue->dequeue();
}
}
else{
for (int j = 0; j < tempQueueSize; ++j){
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
tempQueue->dequeue();
}
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
while(!tempQueue->empty()){
resultList.append(tempQueue->dequeue());
}
delete tempQueue;
tempQueue = nullptr;
}
//BFS+Heap+超过3个多音字只建一个索引比较折中的方案
void stitchMultiToneWordsBFSHeapLess3(const QString& hanzi, QStringList& resultList){
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString>* tempQueue = new QQueue<QString>;
QQueue<QString>* tempQueueFirst = new QQueue<QString>;
tempHanzi = hanzi;
int tempQueueSize = 0;
int multiToneWordNum = 0;
for (auto i : hanzi){
if (FileUtils::map_chinese2pinyin.contains(i)){
if (FileUtils::map_chinese2pinyin[i].size() > 1){
++multiToneWordNum;
}
}
}
if (multiToneWordNum > 3){
QString oneResult, oneResultFirst;
for (auto i : hanzi){
if (FileUtils::map_chinese2pinyin.contains(i)){
oneResult += FileUtils::map_chinese2pinyin[i].first();
oneResultFirst += FileUtils::map_chinese2pinyin[i].first().at(0);
}
else{
oneResult += i;
oneResultFirst += i;
}
}
resultList.append(oneResult);
resultList.append(oneResultFirst);
return;
}
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
tempQueue->enqueue(i);
tempQueueFirst->enqueue(i.at(0));
}
}
else{
tempQueue->enqueue(tempHanzi.at(0));
tempQueueFirst->enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
while (tempHanzi.size() != 0) {
tempQueueSize = tempQueue->size();
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (int j = 0; j < tempQueueSize; ++j){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
tempQueue->enqueue(tempQueue->head() + i);
tempQueueFirst->enqueue(tempQueueFirst->head() + i.at(0));
}
tempQueue->dequeue();
tempQueueFirst->dequeue();
}
}
else{
for (int j = 0; j < tempQueueSize; ++j){
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
tempQueueFirst->enqueue(tempQueueFirst->head() + tempHanzi.at(0));
tempQueue->dequeue();
tempQueueFirst->dequeue();
}
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
while(!tempQueue->empty()){
resultList.append(tempQueue->dequeue());
resultList.append(tempQueueFirst->dequeue());
}
delete tempQueue;
delete tempQueueFirst;
tempQueue = nullptr;
tempQueueFirst = nullptr;
return;
}
QStringList FileUtils::findMultiToneWords(const QString& hanzi) QStringList FileUtils::findMultiToneWords(const QString& hanzi)
{ {
// QStringList* output = new QStringList(); // QStringList* output = new QStringList();
@ -225,7 +379,8 @@ QStringList FileUtils::findMultiToneWords(const QString& hanzi)
QString tempAllPinYin, tempFirst; QString tempAllPinYin, tempFirst;
QStringList stringList = hanzi.split(""); QStringList stringList = hanzi.split("");
stitchMultiToneWordsDFS(hanzi, tempAllPinYin, tempFirst, output); // stitchMultiToneWordsDFS(hanzi, tempAllPinYin, tempFirst, output);
stitchMultiToneWordsBFSHeapLess3(hanzi, output);
// qDebug() << output; // qDebug() << output;
return output; return output;
} }

View File

@ -45,15 +45,15 @@ extern void qt_blurImage(QImage &blurImage, qreal radius, bool quality, int tran
MainWindow::MainWindow(QWidget *parent) : MainWindow::MainWindow(QWidget *parent) :
QMainWindow(parent) QMainWindow(parent)
{ {
// FileUtils::findMultiToneWords("翟康宁test"); // FileUtils::findMultiToneWords("仇仇仇仇仇仇仇仇仇仇仇翟康宁test");
/*-------------Inotify Test Start---------------*/ /*-------------Inotify Test Start---------------*/
// QTime t1 = QTime::currentTime(); QTime t1 = QTime::currentTime();
// InotifyManagerRefact* im = new InotifyManagerRefact("/home"); InotifyManagerRefact* im = new InotifyManagerRefact("/home");
// im->Traverse(); im->Traverse();
// QTime t2 = QTime::currentTime(); QTime t2 = QTime::currentTime();
// qDebug() << t1; qDebug() << t1;
// qDebug() << t2; qDebug() << t2;
// im->start(); im->start();
/*-------------Inotify Test End-----------------*/ /*-------------Inotify Test End-----------------*/
this->setWindowFlags(Qt::CustomizeWindowHint | Qt::FramelessWindowHint | Qt::X11BypassWindowManagerHint); this->setWindowFlags(Qt::CustomizeWindowHint | Qt::FramelessWindowHint | Qt::X11BypassWindowManagerHint);