Add concurrent generate index;

Add folder index;
Delete index is to be done.
This commit is contained in:
zhangpengfei 2020-12-26 08:56:38 +08:00
parent be06116b49
commit 3be6e80d9b
18 changed files with 280 additions and 60 deletions

5
appsearch/appsearch.pri Normal file
View File

@ -0,0 +1,5 @@
INCLUDEPATH += $$PWD
HEADERS +=
SOURCES +=

View File

@ -7,9 +7,9 @@ FileUtils::FileUtils()
{
}
std::string FileUtils::makeDocUterm(QString *path)
std::string FileUtils::makeDocUterm(QString path)
{
return QCryptographicHash::hash(path->toUtf8(),QCryptographicHash::Md5).toStdString();
return QCryptographicHash::hash(path.toUtf8(),QCryptographicHash::Md5).toStdString();
}
/**

View File

@ -8,7 +8,7 @@
class FileUtils
{
public:
static std::string makeDocUterm(QString *);
static std::string makeDocUterm(QString );
static QIcon getFileIcon(const QString &, bool checkValid = true);
static QIcon getAppIcon(const QString &);

View File

@ -12,7 +12,8 @@ public:
explicit chineseCharactersToPinyin(QObject *parent = nullptr);
static QString find(const QString &hanzi)
{
static QMap<QString, QStringList> map = loadHanziTable("://index/pinyinWithoutTone.txt");
// static QMap<QString, QStringList> map = loadHanziTable("://index/pinyinWithoutTone.txt");
static QMap<QString, QStringList> map;
QString output;
QStringList stringList = hanzi.split("");

67
index/document.cpp Normal file
View File

@ -0,0 +1,67 @@
#include "document.h"
#include <QDebug>
Document::Document()
{
m_document = new Xapian::Document;
}
Document::~Document()
{
if(m_document)
delete m_document;
if(m_index_text)
delete m_index_text;
if(m_unique_term)
delete m_unique_term;
}
void Document::setData(QString data)
{
if(data.isEmpty())
return;
m_document->set_data(data.toStdString());
}
void Document::addterm(QString term)
{
if(term.isEmpty())
return;
m_document->add_term(term.toStdString());
}
void Document::addValue(QString value)
{
m_document->add_value(1,value.toStdString());
}
void Document::setUniqueTerm(QString term)
{
if(term.isEmpty())
return;
// m_document->add_term(term.toStdString());
m_unique_term = new QString(term);
}
std::string Document::getUniqueTerm()
{
// qDebug()<<"m_unique_term!"<<*m_unique_term;
return m_unique_term->toStdString();
}
void Document::setIndexText(QStringList indexText)
{
// QStringList indexTextList = indexText;
m_index_text = new QStringList(indexText);
}
QStringList Document::getIndexText()
{
return *m_index_text;
}
Xapian::Document Document::getXapianDocument()
{
return *m_document;
}

28
index/document.h Normal file
View File

@ -0,0 +1,28 @@
#ifndef DOCUMENT_H
#define DOCUMENT_H
#include <xapian.h>
#include <QString>
#include <QStringList>
class Document
{
public:
Document();
~Document();
void setData(QString data);
void addterm(QString term);
void addValue(QString value);
void setUniqueTerm(QString term);
std::string getUniqueTerm();
void setIndexText(QStringList indexText);
QStringList getIndexText();
Xapian::Document getXapianDocument();
private:
Xapian::Document *m_document;
QStringList *m_index_text;
QString *m_unique_term;
};
#endif // DOCUMENT_H

View File

@ -2,8 +2,11 @@
#include <QStandardPaths>
#include <QFileInfo>
#include <QDebug>
#include "file-utils.h"
#include "index-generator.h"
#include "chinesecharacterstopinyin.h"
#include <QtConcurrent>
#include <QFuture>
using namespace std;
@ -23,34 +26,40 @@ bool IndexGenerator::setIndexdataPath()
return true;
}
bool IndexGenerator::creatAllIndex(QStringList *pathlist)
bool IndexGenerator::creatAllIndex(QList<QVector<QString> > *messageList)
{
HandlePathList(pathlist);
HandlePathList(messageList);
try
{
m_indexer = new Xapian::TermGenerator();
m_indexer->set_database(*m_datebase);
//可以实现拼写纠正
// m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING);
m_indexer->set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
QMap<QString, QStringList>::const_iterator i;
QStringList *indexStrList;
QString *docStr;
for(i=m_index_map->constBegin();i!=m_index_map->constEnd();++i)
// for(auto i : *m_index_map)
int count =0;
for(int i = 0;i < m_doc_list->size(); i++)
{
docStr = new QString(i.key());
indexStrList = new QStringList(i.value());
insertIntoDatabase(indexStrList,docStr);
insertIntoDatabase(m_doc_list->at(i));
if(++count == 9999)
{
count = 0;
m_datebase->commit();
}
}
m_datebase->commit();
}
catch(const Xapian::Error &e)
{
qDebug()<<"creatAllIndex fail!"<<QString::fromStdString(e.get_description());
return false;
}
m_index_map->clear();
m_doc_list->clear();
Q_EMIT this->transactionFinished();
return true;
@ -66,57 +75,70 @@ IndexGenerator::~IndexGenerator()
{
}
void IndexGenerator::insertIntoDatabase(QStringList *indexTextList,QString *doc)
void IndexGenerator::insertIntoDatabase(Document doc)
{
qDebug()<< "--index start--";
m_docstr = doc->toStdString();
//m_index_text_str = indexTextList->toStdString();
std::string uniqueterm = m_cryp->hash(doc->toUtf8(),QCryptographicHash::Md5).toStdString();
Xapian::Document document;
document.set_data(m_docstr);
document.add_term(uniqueterm);
Xapian::Document document = doc.getXapianDocument();
m_indexer->set_document(document);
qDebug()<<doc.getIndexText();
for(auto i : *indexTextList){
for(auto i : doc.getIndexText()){
m_indexer->index_text(i.toStdString());
}
// m_indexer->index_text(m_index_text_str);
Xapian::docid innerId= m_datebase->replace_document(uniqueterm,document);
// qDebug()<<"replace doc uniqueterm="<<QString::fromStdString(uniqueterm);
Xapian::docid innerId= m_datebase->replace_document(doc.getUniqueTerm(),document);
qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
qDebug()<< "--index finish--";
return;
}
void IndexGenerator::HandlePathList(QStringList *pathlist)
void IndexGenerator::HandlePathList(QList<QVector<QString>> *messageList)
{
qDebug()<<"Begin HandlePathList!";
m_index_map = new QMap<QString,QStringList>;
QStringList *list = pathlist;
for(int i = 0;i<list->size();i++)
{
auto info = new QFileInfo(list->at(i));
//提取文件名并用空格分割,同时去除'.'
QString filename = info->fileName();
QString index_test = filename.replace(".","").replace("+", "%2B").replace(""," ");
QString pinyin_test = chineseCharactersToPinyin::find(filename.replace(".", "")).replace("", " ");
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument);
future.waitForFinished();
// index_text.simplified();
// qDebug()<<"index_test"<<index_test;
m_index_map->insert(info->absoluteFilePath(),QStringList() << index_test << pinyin_test);
// qDebug()<<m_index_map->value(index_test);
}
QList<Document> docList = future.results();
m_doc_list = new QList<Document>(docList);
qDebug()<<m_doc_list;
qDebug()<<"Finish HandlePathList!";
return;
}
Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
{
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
//0-filename 1-filepathname 2-file or dir
QString index_text = list.at(0);
QString sourcePath = list.at(1);
index_text = index_text.replace(".","").replace(""," ");
index_text = index_text.simplified();
// QString pinyin_text = chineseCharactersToPinyin::find(filename.replace(".", "")).replace("", " ");
// pinyin_text = pinyin_text.simplified();
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
// QString uniqueterm1 = QString::fromStdString(QCryptographicHash::hash(sourcePath.toUtf8(),QCryptographicHash::Md5).toStdString());
/*--------------------------------------------------------------------*/
//QByteArray 和 QString 之间会进行隐式转换,造成字符串被截断等意想不到的后果!!!!!!! zpf
// if(uniqueterm1!=uniqueterm){
// qDebug()<<"-----------------------------------------start";
// qDebug()<<uniqueterm1;
// qDebug()<<uniqueterm;
// qDebug()<<"------------------------------------------finish";
// }
/*--------------------------------------------------------------------*/
Document doc;
doc.setData(sourcePath);
doc.setUniqueTerm(uniqueterm);
doc.addValue(list.at(2));
// doc.setIndexText(QStringList()<<index_text<<pinyin_text);
doc.setIndexText(QStringList()<<index_text);
return doc;
}
bool IndexGenerator::isIndexdataExist()
{

View File

@ -7,6 +7,7 @@
#include <QStringList>
#include <QMap>
#include <QCryptographicHash>
#include "document.h"
class IndexGenerator : public QObject
{
@ -20,17 +21,20 @@ Q_SIGNALS:
void transactionFinished();
void searchFinish();
public Q_SLOTS:
bool creatAllIndex(QStringList *pathlist);
bool creatAllIndex(QList<QVector<QString>> *messageList);
bool deleteAllIndex(QStringList *pathlist);
private:
explicit IndexGenerator(QObject *parent = nullptr);
void HandlePathList(QStringList *pathlist);
void HandlePathList(QList<QVector<QString>> *messageList);
static Document GenerateDocument(const QVector<QString> &list);
//add one data in database
void insertIntoDatabase(QStringList *indexText,QString *doc);
void insertIntoDatabase(Document doc);
~IndexGenerator();
QMap<QString,QStringList> *m_index_map;
QList<Document> *m_doc_list;
QCryptographicHash *m_cryp;
QString *m_index_data_path;
Xapian::WritableDatabase *m_datebase;

View File

@ -2,18 +2,23 @@ INCLUDEPATH += $$PWD
HEADERS += \
$$PWD/chinesecharacterstopinyin.h \
$$PWD/document.h \
$$PWD/index-generator.h \
$$PWD/inotify-manager.h \ \
# $$PWD/inotify-manager.h \
$$PWD/inotify.h \
$$PWD/messagelist-manager.h \
$$PWD/traverse_bfs.h
$$PWD/traverse_bfs.h \
$$PWD/messagelist-manager.h \
$$PWD/text-content-indexer.h
SOURCES += \
$$PWD/chinesecharacterstopinyin.cpp \
$$PWD/document.cpp \
$$PWD/index-generator.cpp \
$$PWD/inotify-manager.cpp \
# $$PWD/inotify-manager.cpp \
$$PWD/inotify.cpp \
$$PWD/messagelist-manager.cpp \
$$PWD/test-Inotify-Manager.cpp \
$$PWD/traverse_bfs.cpp
$$PWD/traverse_bfs.cpp \
$$PWD/text-content-indexer.cpp

View File

@ -80,12 +80,14 @@ bool InotifyManager::AddWatch(const QString &path){
// qDebug() << "m_fd: " <<m_fd;
//int ret = inotify_add_watch(m_fd, path.toStdString().c_str(), IN_ALL_EVENTS);
int ret = inotify_add_watch(m_fd, path.toStdString().c_str(), (IN_MOVED_FROM | IN_MOVED_TO | IN_CREATE | IN_DELETE));
Q_ASSERT(ret!=-1);
if (ret == -1) {
qDebug() << "AddWatch error:" << path;
return false;
}
currentPath[ret] = path;
// qDebug() << "Watch:" << path;
qDebug() << "ret: " <<ret;
qDebug() << "Watch:" << path;
return true;
}

View File

@ -36,9 +36,13 @@ InotifyManagerRefact::~InotifyManagerRefact(){
void InotifyManagerRefact::DoSomething(const QFileInfo& fileInfo){
this->mlm->AddMessage(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(bool((fileInfo.isDir()))));
// this->mlm->AddMessage(QVector<QString>() << "PLog" << "/home/zpf/baidunetdisk/PLog" << "1");
if(fileInfo.isDir()){
this->AddWatch(fileInfo.absoluteFilePath());
}
// else{
// this->mlm->AddMessage(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(bool((fileInfo.isDir()))));
// }
}
bool InotifyManagerRefact::AddWatch(const QString &path){

View File

@ -6,7 +6,7 @@
//#include <unistd.h>
MessageListManager::MessageListManager(){
this->messageList = new QStringList();
this->messageList = new QList<QVector<QString>>();
this->ig = IndexGenerator::getInstance();
// indexGeneratorThread = new QThread();
// this->ig->moveToThread(indexGeneratorThread);
@ -54,7 +54,7 @@ bool MessageListManager::SendDeleteMessage(){
return true;
}
this->ig->deleteAllIndex(this->messageList);
// this->ig->deleteAllIndex(this->messageList);
this->messageList->clear();
return true;
}

View File

@ -20,7 +20,7 @@ private:
// QStringList* messageList;
QList<QVector<QString>>* messageList;
size_t length = 0;
size_t length = 80000;
IndexGenerator* ig;
QThread* indexGeneratorThread;

View File

@ -0,0 +1,38 @@
#include "text-content-indexer.h"
#include <QFile>
#include <QFileInfo>
#include <QTextStream>
#include <QDebug>
TextContentIndexer::TextContentIndexer(QObject *parent) : QObject(parent)
{
}
void TextContentIndexer::creatContentdata()
{
}
void TextContentIndexer::setFileList(QStringList *filelist)
{
m_file_list = filelist;
}
void TextContentIndexer::begin()
{
}
bool TextContentIndexer::getPlaintextFileContent(QString path)
{
QFile file(path);
if(!file.open(QIODevice::ReadOnly))
return false;
QTextStream *stream = new QTextStream(&file);
QString content = stream->readAll();
qDebug()<<content;
return true;
}

View File

@ -0,0 +1,29 @@
#ifndef TEXTCONTENTINDEXER_H
#define TEXTCONTENTINDEXER_H
#include "document.h"
#include <QObject>
#include <QStringList>
#include <QString>
class TextContentIndexer : public QObject
{
Q_OBJECT
public:
explicit TextContentIndexer(QObject *parent = nullptr);
void setFileList(QStringList *filelist);
void begin();
bool getPlaintextFileContent(QString path);
Q_SIGNALS:
bool finish();
private:
void creatContentdata();
QStringList *m_file_list;
Document *m_current_document;
Q_SIGNALS:
};
#endif // TEXTCONTENTINDEXER_H

View File

@ -28,6 +28,8 @@
#include <QObject>
#include "qt-single-application.h"
#include "qt-local-peer.h"
//#include "inotify-manager.h"
#include "inotify.h"
void centerToScreen(QWidget* widget) {
if (!widget)
@ -43,6 +45,16 @@ void centerToScreen(QWidget* widget) {
int main(int argc, char *argv[])
{
/*-------------InotyifyRefact Test Start---------------*/
QTime t1 = QTime::currentTime();
InotifyManagerRefact* imr = new InotifyManagerRefact("/home");
imr->AddWatch("/home");
imr->setPath("/home");
imr->Traverse();
QTime t2 = QTime::currentTime();
qDebug() << t1;
qDebug() << t2;
/*-------------InotyifyRefact Test End-----------------*/
qRegisterMetaType<QVector<QStringList>>("QVector<QStringList>");
QApplication::setAttribute(Qt::AA_EnableHighDpiScaling);
@ -86,5 +98,6 @@ int main(int argc, char *argv[])
w->searchContent(arguments.at(1));
QObject::connect(&app, SIGNAL(messageReceived(const QString&)),w, SLOT(bootOptionsFilter(const QString&)));
return app.exec();
}

View File

@ -32,7 +32,8 @@
#include "file-utils.h"
#include "index-generator.h"
#include "inotify-manager.h"
//#include "inotify-manager.h"
#include "inotify.h"
extern void qt_blurImage(QImage &blurImage, qreal radius, bool quality, int transposed);
/**
@ -46,7 +47,7 @@ MainWindow::MainWindow(QWidget *parent) :
QMainWindow(parent)
{
//testBackServe
testTraverse();
//testTraverse();
this->setWindowFlags(Qt::CustomizeWindowHint | Qt::FramelessWindowHint | Qt::X11BypassWindowManagerHint);
this->setAttribute(Qt::WA_TranslucentBackground, true);

View File

@ -1,4 +1,4 @@
QT += core gui svg dbus x11extras KWindowSystem xml
QT += core gui svg dbus x11extras KWindowSystem xml concurrent
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
@ -18,7 +18,8 @@ DEFINES += QT_DEPRECATED_WARNINGS
include(src/src.pri)
include(index/index.pri)
include(model/model.pri)
include(control/control.pri))
include(control/control.pri)
include(appsearch/appsearch.pri)
include(singleapplication/qt-single-application.pri)
include(settingsmatch/setting-match.pri)