From 3be6e80d9b96af784522d6c34cc4d88eb56a7a16 Mon Sep 17 00:00:00 2001 From: zhangpengfei Date: Sat, 26 Dec 2020 08:56:38 +0800 Subject: [PATCH] Add concurrent generate index; Add folder index; Delete index is to be done. --- appsearch/appsearch.pri | 5 ++ file-utils.cpp | 4 +- file-utils.h | 2 +- index/chinesecharacterstopinyin.h | 3 +- index/document.cpp | 67 +++++++++++++++++++ index/document.h | 28 ++++++++ index/index-generator.cpp | 104 ++++++++++++++++++------------ index/index-generator.h | 10 ++- index/index.pri | 13 ++-- index/inotify-manager.cpp | 4 +- index/inotify.cpp | 4 ++ index/messagelist-manager.cpp | 4 +- index/messagelist-manager.h | 2 +- index/text-content-indexer.cpp | 38 +++++++++++ index/text-content-indexer.h | 29 +++++++++ src/main.cpp | 13 ++++ src/mainwindow.cpp | 5 +- ukui-search.pro | 5 +- 18 files changed, 280 insertions(+), 60 deletions(-) create mode 100644 appsearch/appsearch.pri create mode 100644 index/document.cpp create mode 100644 index/document.h create mode 100644 index/text-content-indexer.cpp create mode 100644 index/text-content-indexer.h diff --git a/appsearch/appsearch.pri b/appsearch/appsearch.pri new file mode 100644 index 0000000..224e0af --- /dev/null +++ b/appsearch/appsearch.pri @@ -0,0 +1,5 @@ +INCLUDEPATH += $$PWD + +HEADERS += + +SOURCES += diff --git a/file-utils.cpp b/file-utils.cpp index d358b44..d636cbd 100644 --- a/file-utils.cpp +++ b/file-utils.cpp @@ -7,9 +7,9 @@ FileUtils::FileUtils() { } -std::string FileUtils::makeDocUterm(QString *path) +std::string FileUtils::makeDocUterm(QString path) { - return QCryptographicHash::hash(path->toUtf8(),QCryptographicHash::Md5).toStdString(); + return QCryptographicHash::hash(path.toUtf8(),QCryptographicHash::Md5).toStdString(); } /** diff --git a/file-utils.h b/file-utils.h index 1d1275b..55b7fa4 100644 --- a/file-utils.h +++ b/file-utils.h @@ -8,7 +8,7 @@ class FileUtils { public: - static std::string makeDocUterm(QString *); + static std::string makeDocUterm(QString ); static QIcon getFileIcon(const QString &, bool checkValid = true); static QIcon getAppIcon(const QString &); diff --git a/index/chinesecharacterstopinyin.h b/index/chinesecharacterstopinyin.h index db498f3..7805fed 100644 --- a/index/chinesecharacterstopinyin.h +++ b/index/chinesecharacterstopinyin.h @@ -12,7 +12,8 @@ public: explicit chineseCharactersToPinyin(QObject *parent = nullptr); static QString find(const QString &hanzi) { - static QMap map = loadHanziTable("://index/pinyinWithoutTone.txt"); +// static QMap map = loadHanziTable("://index/pinyinWithoutTone.txt"); + static QMap map; QString output; QStringList stringList = hanzi.split(""); diff --git a/index/document.cpp b/index/document.cpp new file mode 100644 index 0000000..1b3c804 --- /dev/null +++ b/index/document.cpp @@ -0,0 +1,67 @@ +#include "document.h" +#include + +Document::Document() +{ + m_document = new Xapian::Document; +} + +Document::~Document() +{ + if(m_document) + delete m_document; + if(m_index_text) + delete m_index_text; + if(m_unique_term) + delete m_unique_term; +} + +void Document::setData(QString data) +{ + if(data.isEmpty()) + return; + m_document->set_data(data.toStdString()); +} + +void Document::addterm(QString term) +{ + if(term.isEmpty()) + return; + m_document->add_term(term.toStdString()); +} + +void Document::addValue(QString value) +{ + m_document->add_value(1,value.toStdString()); +} + +void Document::setUniqueTerm(QString term) +{ + if(term.isEmpty()) + return; +// m_document->add_term(term.toStdString()); + + m_unique_term = new QString(term); +} + +std::string Document::getUniqueTerm() +{ +// qDebug()<<"m_unique_term!"<<*m_unique_term; + return m_unique_term->toStdString(); +} + +void Document::setIndexText(QStringList indexText) +{ +// QStringList indexTextList = indexText; + m_index_text = new QStringList(indexText); +} + +QStringList Document::getIndexText() +{ + return *m_index_text; +} + +Xapian::Document Document::getXapianDocument() +{ + return *m_document; +} diff --git a/index/document.h b/index/document.h new file mode 100644 index 0000000..b9a6526 --- /dev/null +++ b/index/document.h @@ -0,0 +1,28 @@ +#ifndef DOCUMENT_H +#define DOCUMENT_H + +#include +#include +#include + +class Document +{ +public: + Document(); + ~Document(); + void setData(QString data); + void addterm(QString term); + void addValue(QString value); + void setUniqueTerm(QString term); + std::string getUniqueTerm(); + void setIndexText(QStringList indexText); + QStringList getIndexText(); + Xapian::Document getXapianDocument(); +private: + Xapian::Document *m_document; + QStringList *m_index_text; + QString *m_unique_term; + +}; + +#endif // DOCUMENT_H diff --git a/index/index-generator.cpp b/index/index-generator.cpp index 000fc4e..9aa872d 100644 --- a/index/index-generator.cpp +++ b/index/index-generator.cpp @@ -2,8 +2,11 @@ #include #include #include +#include "file-utils.h" #include "index-generator.h" #include "chinesecharacterstopinyin.h" +#include +#include using namespace std; @@ -23,34 +26,40 @@ bool IndexGenerator::setIndexdataPath() return true; } -bool IndexGenerator::creatAllIndex(QStringList *pathlist) +bool IndexGenerator::creatAllIndex(QList > *messageList) { - HandlePathList(pathlist); + HandlePathList(messageList); try { m_indexer = new Xapian::TermGenerator(); m_indexer->set_database(*m_datebase); + //可以实现拼写纠正 // m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING); m_indexer->set_stemming_strategy(Xapian::TermGenerator::STEM_SOME); - QMap::const_iterator i; - QStringList *indexStrList; - QString *docStr; - for(i=m_index_map->constBegin();i!=m_index_map->constEnd();++i) -// for(auto i : *m_index_map) + + + int count =0; + for(int i = 0;i < m_doc_list->size(); i++) { - docStr = new QString(i.key()); - indexStrList = new QStringList(i.value()); - insertIntoDatabase(indexStrList,docStr); + insertIntoDatabase(m_doc_list->at(i)); + + if(++count == 9999) + { + count = 0; + m_datebase->commit(); + } } m_datebase->commit(); + + } catch(const Xapian::Error &e) { qDebug()<<"creatAllIndex fail!"<clear(); + m_doc_list->clear(); Q_EMIT this->transactionFinished(); return true; @@ -66,57 +75,70 @@ IndexGenerator::~IndexGenerator() { } -void IndexGenerator::insertIntoDatabase(QStringList *indexTextList,QString *doc) +void IndexGenerator::insertIntoDatabase(Document doc) { qDebug()<< "--index start--"; - m_docstr = doc->toStdString(); - //m_index_text_str = indexTextList->toStdString(); - - std::string uniqueterm = m_cryp->hash(doc->toUtf8(),QCryptographicHash::Md5).toStdString(); - - Xapian::Document document; - document.set_data(m_docstr); - document.add_term(uniqueterm); + Xapian::Document document = doc.getXapianDocument(); m_indexer->set_document(document); + qDebug()<index_text(i.toStdString()); } -// m_indexer->index_text(m_index_text_str); - Xapian::docid innerId= m_datebase->replace_document(uniqueterm,document); - - // qDebug()<<"replace doc uniqueterm="<replace_document(doc.getUniqueTerm(),document); qDebug()<<"replace doc docid="<(innerId); qDebug()<< "--index finish--"; return; - } -void IndexGenerator::HandlePathList(QStringList *pathlist) +void IndexGenerator::HandlePathList(QList> *messageList) { qDebug()<<"Begin HandlePathList!"; - m_index_map = new QMap; - QStringList *list = pathlist; - for(int i = 0;isize();i++) - { - auto info = new QFileInfo(list->at(i)); - //提取文件名并用空格分割,同时去除'.' - QString filename = info->fileName(); - QString index_test = filename.replace(".","").replace("+", "%2B").replace(""," "); - QString pinyin_test = chineseCharactersToPinyin::find(filename.replace(".", "")).replace("", " "); +// qDebug()< future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument); + future.waitForFinished(); -// index_text.simplified(); - // qDebug()<<"index_test"<insert(info->absoluteFilePath(),QStringList() << index_test << pinyin_test); - // qDebug()<value(index_test); - } + QList docList = future.results(); + m_doc_list = new QList(docList); + qDebug()< &list) +{ +// qDebug()< #include #include +#include "document.h" class IndexGenerator : public QObject { @@ -20,17 +21,20 @@ Q_SIGNALS: void transactionFinished(); void searchFinish(); public Q_SLOTS: - bool creatAllIndex(QStringList *pathlist); + bool creatAllIndex(QList> *messageList); bool deleteAllIndex(QStringList *pathlist); private: explicit IndexGenerator(QObject *parent = nullptr); - void HandlePathList(QStringList *pathlist); + void HandlePathList(QList> *messageList); + static Document GenerateDocument(const QVector &list); //add one data in database - void insertIntoDatabase(QStringList *indexText,QString *doc); + void insertIntoDatabase(Document doc); ~IndexGenerator(); QMap *m_index_map; + QList *m_doc_list; + QCryptographicHash *m_cryp; QString *m_index_data_path; Xapian::WritableDatabase *m_datebase; diff --git a/index/index.pri b/index/index.pri index 175a493..8b9adb0 100644 --- a/index/index.pri +++ b/index/index.pri @@ -2,18 +2,23 @@ INCLUDEPATH += $$PWD HEADERS += \ $$PWD/chinesecharacterstopinyin.h \ + $$PWD/document.h \ $$PWD/index-generator.h \ - $$PWD/inotify-manager.h \ \ +# $$PWD/inotify-manager.h \ $$PWD/inotify.h \ $$PWD/messagelist-manager.h \ - $$PWD/traverse_bfs.h + $$PWD/traverse_bfs.h \ + $$PWD/messagelist-manager.h \ + $$PWD/text-content-indexer.h SOURCES += \ $$PWD/chinesecharacterstopinyin.cpp \ + $$PWD/document.cpp \ $$PWD/index-generator.cpp \ - $$PWD/inotify-manager.cpp \ +# $$PWD/inotify-manager.cpp \ $$PWD/inotify.cpp \ $$PWD/messagelist-manager.cpp \ $$PWD/test-Inotify-Manager.cpp \ - $$PWD/traverse_bfs.cpp + $$PWD/traverse_bfs.cpp \ + $$PWD/text-content-indexer.cpp diff --git a/index/inotify-manager.cpp b/index/inotify-manager.cpp index d3e6c5e..909afa5 100644 --- a/index/inotify-manager.cpp +++ b/index/inotify-manager.cpp @@ -80,12 +80,14 @@ bool InotifyManager::AddWatch(const QString &path){ // qDebug() << "m_fd: " <mlm->AddMessage(QVector() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(bool((fileInfo.isDir())))); +// this->mlm->AddMessage(QVector() << "PLog" << "/home/zpf/baidunetdisk/PLog" << "1"); if(fileInfo.isDir()){ this->AddWatch(fileInfo.absoluteFilePath()); } +// else{ +// this->mlm->AddMessage(QVector() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(bool((fileInfo.isDir())))); +// } } bool InotifyManagerRefact::AddWatch(const QString &path){ diff --git a/index/messagelist-manager.cpp b/index/messagelist-manager.cpp index 979cd38..44882bc 100644 --- a/index/messagelist-manager.cpp +++ b/index/messagelist-manager.cpp @@ -6,7 +6,7 @@ //#include MessageListManager::MessageListManager(){ - this->messageList = new QStringList(); + this->messageList = new QList>(); this->ig = IndexGenerator::getInstance(); // indexGeneratorThread = new QThread(); // this->ig->moveToThread(indexGeneratorThread); @@ -54,7 +54,7 @@ bool MessageListManager::SendDeleteMessage(){ return true; } - this->ig->deleteAllIndex(this->messageList); +// this->ig->deleteAllIndex(this->messageList); this->messageList->clear(); return true; } diff --git a/index/messagelist-manager.h b/index/messagelist-manager.h index 909e496..d79310b 100644 --- a/index/messagelist-manager.h +++ b/index/messagelist-manager.h @@ -20,7 +20,7 @@ private: // QStringList* messageList; QList>* messageList; - size_t length = 0; + size_t length = 80000; IndexGenerator* ig; QThread* indexGeneratorThread; diff --git a/index/text-content-indexer.cpp b/index/text-content-indexer.cpp new file mode 100644 index 0000000..b99ad34 --- /dev/null +++ b/index/text-content-indexer.cpp @@ -0,0 +1,38 @@ +#include "text-content-indexer.h" +#include +#include +#include +#include + +TextContentIndexer::TextContentIndexer(QObject *parent) : QObject(parent) +{ + +} + +void TextContentIndexer::creatContentdata() +{ + +} + +void TextContentIndexer::setFileList(QStringList *filelist) +{ + m_file_list = filelist; +} + +void TextContentIndexer::begin() +{ + +} + +bool TextContentIndexer::getPlaintextFileContent(QString path) +{ + QFile file(path); + if(!file.open(QIODevice::ReadOnly)) + return false; + + QTextStream *stream = new QTextStream(&file); + QString content = stream->readAll(); + qDebug()< +#include +#include + +class TextContentIndexer : public QObject +{ + Q_OBJECT +public: + explicit TextContentIndexer(QObject *parent = nullptr); + void setFileList(QStringList *filelist); + void begin(); + bool getPlaintextFileContent(QString path); +Q_SIGNALS: + bool finish(); +private: + void creatContentdata(); + QStringList *m_file_list; + Document *m_current_document; + + +Q_SIGNALS: + +}; + +#endif // TEXTCONTENTINDEXER_H diff --git a/src/main.cpp b/src/main.cpp index eb85521..781c073 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -28,6 +28,8 @@ #include #include "qt-single-application.h" #include "qt-local-peer.h" +//#include "inotify-manager.h" +#include "inotify.h" void centerToScreen(QWidget* widget) { if (!widget) @@ -43,6 +45,16 @@ void centerToScreen(QWidget* widget) { int main(int argc, char *argv[]) { + /*-------------InotyifyRefact Test Start---------------*/ + QTime t1 = QTime::currentTime(); + InotifyManagerRefact* imr = new InotifyManagerRefact("/home"); + imr->AddWatch("/home"); + imr->setPath("/home"); + imr->Traverse(); + QTime t2 = QTime::currentTime(); + qDebug() << t1; + qDebug() << t2; + /*-------------InotyifyRefact Test End-----------------*/ qRegisterMetaType>("QVector"); QApplication::setAttribute(Qt::AA_EnableHighDpiScaling); @@ -86,5 +98,6 @@ int main(int argc, char *argv[]) w->searchContent(arguments.at(1)); QObject::connect(&app, SIGNAL(messageReceived(const QString&)),w, SLOT(bootOptionsFilter(const QString&))); + return app.exec(); } diff --git a/src/mainwindow.cpp b/src/mainwindow.cpp index 6be9970..fd3a077 100644 --- a/src/mainwindow.cpp +++ b/src/mainwindow.cpp @@ -32,7 +32,8 @@ #include "file-utils.h" #include "index-generator.h" -#include "inotify-manager.h" +//#include "inotify-manager.h" +#include "inotify.h" extern void qt_blurImage(QImage &blurImage, qreal radius, bool quality, int transposed); /** @@ -46,7 +47,7 @@ MainWindow::MainWindow(QWidget *parent) : QMainWindow(parent) { //testBackServe - testTraverse(); + //testTraverse(); this->setWindowFlags(Qt::CustomizeWindowHint | Qt::FramelessWindowHint | Qt::X11BypassWindowManagerHint); this->setAttribute(Qt::WA_TranslucentBackground, true); diff --git a/ukui-search.pro b/ukui-search.pro index 2e357ba..109f0c7 100644 --- a/ukui-search.pro +++ b/ukui-search.pro @@ -1,4 +1,4 @@ -QT += core gui svg dbus x11extras KWindowSystem xml +QT += core gui svg dbus x11extras KWindowSystem xml concurrent greaterThan(QT_MAJOR_VERSION, 4): QT += widgets @@ -18,7 +18,8 @@ DEFINES += QT_DEPRECATED_WARNINGS include(src/src.pri) include(index/index.pri) include(model/model.pri) -include(control/control.pri)) +include(control/control.pri) +include(appsearch/appsearch.pri) include(singleapplication/qt-single-application.pri) include(settingsmatch/setting-match.pri)