Add files about index creation.

This commit is contained in:
zhangpengfei 2020-12-21 18:50:54 +08:00 committed by Burgess Chang
parent 8eb8e4c834
commit ea7e44cf9a
14 changed files with 653 additions and 7 deletions

1
control/control.pri Normal file
View File

@ -0,0 +1 @@
INCLUDEPATH += $$PWD

10
file-utils.cpp Normal file
View File

@ -0,0 +1,10 @@
#include "file-utils.h"
FileUtils::FileUtils()
{
}
std::string FileUtils::makeDocUterm(QString *path)
{
return QCryptographicHash::hash(path->toUtf8(),QCryptographicHash::Md5).toStdString();
}

14
file-utils.h Normal file
View File

@ -0,0 +1,14 @@
#ifndef FILEUTILS_H
#define FILEUTILS_H
#include <QString>
#include <QCryptographicHash>
class FileUtils
{
public:
static std::string makeDocUterm(QString *path);
private:
FileUtils();
};
#endif // FILEUTILS_H

192
index/index-generator.cpp Normal file
View File

@ -0,0 +1,192 @@
#include "index-generator.h"
#include <QFile>
#include <QFileInfo>
#include <QDebug>
using namespace std;
#define INDEX_PATH "/home/zpf/.config/org.ukui/index_data"
static IndexGenerator *global_instance = nullptr;
IndexGenerator *IndexGenerator::getInstance()
{
if (!global_instance) {
global_instance = new IndexGenerator;
}
return global_instance;
}
bool IndexGenerator::creatAllIndex(QStringList *pathlist)
{
HandlePathList(pathlist);
try
{
m_indexer = new Xapian::TermGenerator();
m_indexer->set_database(*m_datebase);
m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING);
m_indexer->set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
QMap<QString, QString>::const_iterator i;
QString *indexStr;
QString *docStr;
for(i=m_index_map->constBegin();i!=m_index_map->constEnd();++i)
{
docStr = new QString(i.key());
indexStr = new QString(i.value());
CreatIndex(indexStr,docStr);
}
m_datebase->commit();
}
catch(const Xapian::Error &e)
{
qDebug()<<"creatAllIndex fail!"<<QString::fromStdString(e.get_description());
return false;
}
m_index_map->clear();
Q_EMIT this->transactionFinished();
return true;
}
IndexGenerator::IndexGenerator(QObject *parent) : QObject(parent)
{
m_datebase = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
m_cryp = new QCryptographicHash(QCryptographicHash::Md5);
}
IndexGenerator::~IndexGenerator()
{
}
void IndexGenerator::CreatIndex(QString *indexText,QString *doc)
{
qDebug()<< "--index start--";
m_docstr = doc->toStdString();
m_index_text_str = indexText->toStdString();
std::string uniqueterm = m_cryp->hash(doc->toUtf8(),QCryptographicHash::Md5).toStdString();
Xapian::Document document;
document.set_data(m_docstr);
document.add_term(uniqueterm);
m_indexer->set_document(document);
m_indexer->index_text(m_index_text_str);
Xapian::docid innerId= m_datebase->replace_document(uniqueterm,document);
// qDebug()<<"replace doc uniqueterm="<<QString::fromStdString(uniqueterm);
qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
qDebug()<< "--index finish--";
return;
}
void IndexGenerator::HandlePathList(QStringList *pathlist)
{
qDebug()<<"Begin HandlePathList!";
m_index_map = new QMap<QString,QString>;
QStringList *list = pathlist;
for(int i = 0;i<list->size();i++)
{
auto info = new QFileInfo(list->at(i));
//提取文件名并用空格分割,同时去除'.'
QString filename = info->fileName();
QString index_test = filename.replace(".","").replace(""," ");
index_test.simplified();
// qDebug()<<"index_test"<<index_test;
m_index_map->insert(info->absoluteFilePath(),index_test);
// qDebug()<<m_index_map->value(index_test);
}
qDebug()<<"Finish HandlePathList!";
return;
}
bool IndexGenerator::isIndexdataExist()
{
// Xapian::Database db(m_index_data_path->toStdString());
}
void IndexGenerator::IndexSearch(QString *indexText)
{
try
{
qDebug()<<"--search start--";
Xapian::Enquire enquire(*m_datebase);
Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_NEAR);
qp.set_database(*m_datebase);
std::string queryStr = indexText->replace(""," ").toStdString();
// std::string s =db.get_spelling_suggestion(queryStr,10);
// qDebug()<<"spelling_suggestion!"<<QString::fromStdString(s);
qDebug()<<"queryStr!"<<QString::fromStdString(queryStr);
Xapian::Query query = qp.parse_query(queryStr);;
qDebug()<<QString::fromStdString(query.get_description());
enquire.set_query(query);
Xapian::MSet result = enquire.get_mset(0, 9999);
qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated());
QStringList *pathTobeDelete = new QStringList;
for (auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
Xapian::weight docScoreWeight = it.get_weight();
Xapian::percent docScorePercent = it.get_percent();
QFileInfo *info = new QFileInfo(QString::fromStdString(data));
if(!info->exists())
{
pathTobeDelete->append(QString::fromStdString(data));
qDebug()<<QString::fromStdString(data)<<"is not exist!!";
}
qDebug()<< "doc="<< QString::fromStdString(data) << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
}
//Delete those path doc which is not already exist.
if(!pathTobeDelete->isEmpty())
deleteAllIndex(pathTobeDelete);
qDebug()<< "--search finish--";
}
catch(const Xapian::Error &e)
{
qDebug() <<QString::fromStdString(e.get_description());
}
Q_EMIT this->searchFinish();
return;
}
bool IndexGenerator::deleteAllIndex(QStringList *pathlist)
{
QStringList *list = pathlist;
if(list->isEmpty())
return true;
for(int i = 0;i<list->size();i++)
{
QString doc = list->at(i);
std::string uniqueterm = m_cryp->hash(doc.toUtf8(),QCryptographicHash::Md5).toStdString();;
try
{
qDebug()<<"--delete start--";
m_datebase->delete_document(uniqueterm);
qDebug()<<"delete md5"<<QString::fromStdString(uniqueterm);
m_datebase->commit();
qDebug()<< "--delete finish--";
}
catch(const Xapian::Error &e)
{
qDebug() <<QString::fromStdString(e.get_description());
return false;
}
}
Q_EMIT this->transactionFinished();
return true;
}

42
index/index-generator.h Normal file
View File

@ -0,0 +1,42 @@
#ifndef INDEXGENERATOR_H
#define INDEXGENERATOR_H
#include <xapian.h>
#include <QObject>
#include <QStringList>
#include <QMultiMap>
#include <QCryptographicHash>
class IndexGenerator : public QObject
{
Q_OBJECT
public:
static IndexGenerator *getInstance();
bool isIndexdataExist();
Q_SIGNALS:
void transactionFinished();
void searchFinish();
public Q_SLOTS:
bool creatAllIndex(QStringList *pathlist);
bool deleteAllIndex(QStringList *pathlist);
void IndexSearch(QString *indexText);
private:
explicit IndexGenerator(QObject *parent = nullptr);
void HandlePathList(QStringList *pathlist);
//add one data in database
void CreatIndex(QString *indexText,QString *doc);
~IndexGenerator();
QMap<QString,QString> *m_index_map;
QCryptographicHash *m_cryp;
QString *m_index_data_path;
Xapian::WritableDatabase *m_datebase;
std::string m_docstr;
std::string m_index_text_str;
Xapian::TermGenerator *m_indexer;
};
#endif // INDEXGENERATOR_H

13
index/index.pri Normal file
View File

@ -0,0 +1,13 @@
INCLUDEPATH += $$PWD
HEADERS += \
$$PWD/index-generator.h \
$$PWD/inotify-manager.h \
$$PWD/messagelistmanager.h \
SOURCES += \
$$PWD/index-generator.cpp \
$$PWD/inotify-manager.cpp \
$$PWD/messagelistmanager.cpp \
$$PWD/testInotifyManager.cpp \

219
index/inotify-manager.cpp Normal file
View File

@ -0,0 +1,219 @@
#include "inotify-manager.h"
#include "index-generator.h"
#include "messagelistmanager.h"
bool InotifyManager::Traverse_BFS(const QString& path, const bool& CREATORDELETE){
qDebug() << "BFS start-----------------------------";
int total = 0;
MessageListManager mlm;
mlm.SetAutoSendMessageLength(80000);
QQueue<QString> bfs;
bfs.enqueue(path);
QFileInfoList list;
QDir dir;
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
while (!bfs.empty()) {
dir.setPath(bfs.dequeue());
list = dir.entryInfoList();
for (auto i : list){
// qDebug() << i.absoluteFilePath();
if (i.isDir()){
AddWatch(i.absoluteFilePath());
bfs.enqueue(i.absoluteFilePath());
}
else{
mlm.AddMessage(i.absoluteFilePath());
total++;
//continue;
}
}
}
mlm.SendMessage();
qDebug()<<total;
qDebug() << "BFS end-----------------------------";
return true;
}
bool InotifyManager::Traverse(const QString& path, const bool& CREATORDELETE){
QDir dir(path);
if (!dir.exists()) {
return false;
}
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
QFileInfoList list = dir.entryInfoList();
int i = 0;
do {
if(list.size()==0){
break;
}
QFileInfo fileInfo = list.at(i);
bool bisDir = fileInfo.isDir();
if (fileInfo.fileName().at(0) != '.'){
if (bisDir) {
// qDebug() << QString("Path: %0/%1")
// .arg(fileInfo.path())
// .arg(fileInfo.fileName());
qDebug() << "inotify-manager traverse: " << fileInfo.filePath();
if (CREATORDELETE){
AddWatch(fileInfo.filePath());
}
Traverse(fileInfo.filePath(), CREATORDELETE);
if (!CREATORDELETE){
RemoveWatch(fileInfo.filePath());
}
} else {
// qDebug() << QString("File: %0/%1")
// .arg(fileInfo.path())
// .arg(fileInfo.fileName());
//IndexGenerator::getInstance()->creatAllIndex(new QStringList(fileInfo.filePath()));
}
}
i++;
} while (i < list.size());
return true;
}
bool InotifyManager::AddWatch(const QString &path){
//m_fd = inotify_init();
// qDebug() << "m_fd: " <<m_fd;
//int ret = inotify_add_watch(m_fd, path.toStdString().c_str(), IN_ALL_EVENTS);
int ret = inotify_add_watch(m_fd, path.toStdString().c_str(), (IN_MOVED_FROM | IN_MOVED_TO | IN_CREATE | IN_DELETE));
if (ret == -1) {
return false;
}
currentPath[ret] = path;
// qDebug() << "Watch:" << path;
return true;
}
//暂时没用
bool InotifyManager::AddWatchList(const QStringList &paths){
m_fd = inotify_init();
qDebug() << "m_fd----------->" <<m_fd;
for (const QString& p:paths) {
int ret = inotify_add_watch(m_fd, p.toStdString().c_str(), IN_ALL_EVENTS);
if (ret == -1) {
return false;
}
}
return true;
}
bool InotifyManager::RemoveWatch(const QString &path){
int ret = inotify_rm_watch(m_fd, currentPath.key(path));
if (ret){
qDebug() << "remove path error";
return false;
}
// qDebug() << "remove path: " << path;
for (QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();){
if (i.value().length() > path.length()){
if (i.value().mid(0, path.length()) == path){
qDebug() << i.value();
/*--------------------------------*/
//在此调用删除索引
IndexGenerator::getInstance()->deleteAllIndex(new QStringList(path));
/*--------------------------------*/
currentPath.erase(i++);
}
else{
i++;
}
}
else{
i++;
}
}
qDebug() << path;
//这个貌似不用删先mark一下
//currentPath.remove(currentPath.key(path));
return true;
}
void InotifyManager::run(){
char * p;
char buf[BUF_LEN] __attribute__((aligned(8)));
ssize_t numRead;
for (;;) { /* Read events forever */
numRead = read(m_fd, buf, BUF_LEN);
if (numRead == 0) {
qDebug() << "read() from inotify fd returned 0!";
}
if (numRead == -1) {
qDebug() << "read";
}
qDebug() << "Read " << numRead << " bytes from inotify fd";
/* Process all of the events in buffer returned by read() */
for (p = buf; p < buf + numRead;) {
struct inotify_event * event = reinterpret_cast<inotify_event *>(p);
if(event->name[0] != '.'){
// if(true){
//这个位运算不要在意,只是懒得把文件夹、文件和事件排列组合了,只是看一下事件的类型
qDebug() << "Read Event: " << num2string[(event->mask & 0x0000ffff)] << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd;
//num2string[event->mask & 0x0000ffff]
IndexGenerator::getInstance()->creatAllIndex(new QStringList(currentPath[event->wd] + event->name));
/*--------------------------------*/
//传创建或移动过来的文件路径
if((event->mask & IN_CREATE) | (event->mask & IN_MOVED_TO)){
//添加监视要先序遍历先添加top节点
if (event->mask & IN_ISDIR){
AddWatch(currentPath[event->wd] + '/' + event->name);
Traverse_BFS(currentPath[event->wd] + '/' + event->name, true);
}
else {
//IndexGenerator::getInstance()->creatAllIndex(new QStringList(currentPath[event->wd] + '/' + event->name));
}
}
else if((event->mask & IN_DELETE) | (event->mask & IN_MOVED_FROM)){
if (event->mask & IN_ISDIR){
RemoveWatch(currentPath[event->wd] + '/' + event->name);
}
else {
//这里调用删除索引
IndexGenerator::getInstance()->deleteAllIndex(new QStringList(currentPath[event->wd] + '/' + event->name));
}
}
/*--------------------------------*/
}
p += sizeof(struct inotify_event) + event->len;
}
}
}
InotifyManager::InotifyManager()
{
m_fd = inotify_init();
qDebug() << "m_fd----------->" <<m_fd;
num2string.insert(IN_ACCESS, "IN_ACCESS");
num2string.insert(IN_MODIFY, "IN_MODIFY");
num2string.insert(IN_ATTRIB, "IN_ATTRIB");
num2string.insert(IN_CLOSE_WRITE, "IN_CLOSE_WRITE");
num2string.insert(IN_CLOSE_NOWRITE, "IN_CLOSE_NOWRITE");
num2string.insert(IN_CLOSE, "IN_CLOSE");
num2string.insert(IN_OPEN, "IN_OPEN");
num2string.insert(IN_MOVED_FROM, "IN_MOVED_FROM");
num2string.insert(IN_MOVED_TO, "IN_MOVED_TO");
num2string.insert(IN_MOVE, "IN_MOVE");
num2string.insert(IN_CREATE, "IN_CREATE");
num2string.insert(IN_DELETE, "IN_DELETE");
num2string.insert(IN_DELETE_SELF, "IN_DELETE_SELF");
num2string.insert(IN_MOVE_SELF, "IN_MOVE_SELF");
num2string.insert(IN_UNMOUNT, "IN_UNMOUNT");
num2string.insert(IN_Q_OVERFLOW, "IN_Q_OVERFLOW");
num2string.insert(IN_IGNORED, "IN_IGNORED");
return;
}

40
index/inotify-manager.h Normal file
View File

@ -0,0 +1,40 @@
#ifndef INOTIFYMANAGER_H
#define INOTIFYMANAGER_H
#include <QObject>
#include <QThread>
#include <unistd.h>
#include <sys/inotify.h>
#include <QDebug>
#include <QDir>
#include <QQueue>
//#define EVENT_NUM 12
#define BUF_LEN 1024
class InotifyManager : public QThread
{
Q_OBJECT
public:
explicit InotifyManager();
bool Traverse(const QString&, const bool&);//true->create, false->delete
bool Traverse_BFS(const QString&, const bool&);
//typedef bool (*AddWatch)(const QString&);
//AddWatch cmp;
bool AddWatch(const QString&);
bool AddWatchList(const QStringList&);
bool RemoveWatch(const QString&);
protected:
void run() override;
private:
QString *m_watch_path;
int m_fd;
QMap<int, QString> currentPath;
QMap<int, QString> num2string;
};
void testTraverse(void);
#endif // INOTIFYMANAGER_H

View File

@ -0,0 +1,35 @@
#include "messagelistmanager.h"
#include <QDebug>
MessageListManager::MessageListManager(){
this->messageList = new QStringList();
this->ig = IndexGenerator::getInstance();
}
MessageListManager::~MessageListManager(){
delete this->messageList;
//delete this->ig;
this->messageList = nullptr;
this->ig = nullptr;
}
void MessageListManager::AddMessage(const QString& path){
this->messageList->append(path);
if (static_cast<size_t>(this->messageList->length()) >= this->length){
this->SendMessage();
}
}
bool MessageListManager::SendMessage(){
if (this->messageList->empty()){
return true;
}
this->ig->creatAllIndex(this->messageList);
this->messageList->clear();
return true;
}
void MessageListManager::SetAutoSendMessageLength(const size_t& length){
this->length = length;
}

View File

@ -0,0 +1,26 @@
#ifndef MESSAGELISTMANAGER_H
#define MESSAGELISTMANAGER_H
#include <QObject>
#include "index-generator.h"
class MessageListManager : public QObject
{
Q_OBJECT
public:
explicit MessageListManager();
~MessageListManager();
void AddMessage(const QString&);
bool SendMessage();
void SetAutoSendMessageLength(const size_t&);
private:
QStringList* messageList;
size_t length = 0;
IndexGenerator* ig;
Q_SIGNALS:
};
#endif // MESSAGELISTMANAGER_H

View File

@ -0,0 +1,38 @@
#include "mainwindow.h"
#include "inotify-manager.h"
#include <QTime>
#include <QDebug>
void testTraverse(void){
// QStringList qsl;
// for (int i = 0; i < 4000; i++){
// qsl.append(QString("%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1%1").arg(i));
// }
// for (int i = 0; i < 4000; i++){
// qDebug() << i << " " << qsl.at(i);
// if (qsl.at(i) != QString("%1").arg(i)){
// qDebug() << "fuck stringlist";
// }
// }
// qDebug() << qsl.length();
// qDebug() << "stringlist";
// exit(0);
QTime t1 = QTime::currentTime();
InotifyManager* im = new InotifyManager();
im->AddWatch("/home/zpf");
im->Traverse_BFS("/home/zpf", true);
QTime t2 = QTime::currentTime();
qDebug() << t1;
qDebug() << t2;
im->start();
//exit(0);
}

1
model/model.pri Normal file
View File

@ -0,0 +1 @@
INCLUDEPATH += $$PWD

10
src/src.pri Normal file
View File

@ -0,0 +1,10 @@
INCLUDEPATH += $$PWD
HEADERS += \
$$PWD/mainwindow.h \
SOURCES += \
$$PWD/main.cpp \
$$PWD/mainwindow.cpp \

View File

@ -2,7 +2,7 @@ QT += core gui
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
CONFIG += c++11 CONFIG += c++11 no_keywords
# The following define makes your compiler emit warnings if you use # The following define makes your compiler emit warnings if you use
# any Qt feature that has been marked deprecated (the exact warnings # any Qt feature that has been marked deprecated (the exact warnings
@ -15,14 +15,19 @@ DEFINES += QT_DEPRECATED_WARNINGS
# You can also select to disable deprecated APIs only up to a certain version of Qt. # You can also select to disable deprecated APIs only up to a certain version of Qt.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
SOURCES += \ include(src/src.pri))
main.cpp \ include(index/index.pri)
mainwindow.cpp include(model/model.pri)
include(control/control.pri))
HEADERS += \
mainwindow.h
LIBS = -lxapian
# Default rules for deployment. # Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin else: unix:!android: target.path = /opt/$${TARGET}/bin
!isEmpty(target.path): INSTALLS += target !isEmpty(target.path): INSTALLS += target
HEADERS += \
file-utils.h
SOURCES += \
file-utils.cpp