Merge branch '0507-dev' into 'V4-dev'

Add C++ interface.

See merge request kylin-desktop/ukui-search!16
This commit is contained in:
Zhai Kangning 2021-05-07 06:14:12 +00:00
commit d5b968b437
12 changed files with 484 additions and 21 deletions

8
debian/control vendored
View File

@ -42,6 +42,14 @@ Description: Libraries for chinese-segmentation
This package contains a few runtime libraries needed by
libsearch.
Package: libchinese-segmentation-dev
Section: libdevel
Architecture: any
Depends: ${misc:Depends},
${shlibs:Depends},
libchinese-segmentation0 (= ${binary:Version})
Description: Libraries for chinese-segmentation(development files).
Package: libukui-search0
Section: libs
Architecture: any

View File

@ -0,0 +1,2 @@
usr/include/chinese-seg/*
usr/lib/*.so

View File

@ -1,2 +1,2 @@
usr/lib/*.so.*
usr/lib/libchinese-segmentation.so.*
/usr/share/ukui-search/res/dict/*.utf8

View File

@ -1,2 +1,2 @@
usr/include/ukui-search/*
usr/lib/*/*.so
usr/lib/*/libukui-search.so

View File

@ -1 +1 @@
usr/lib/*/*.so.*
usr/lib/*/libukui-search.so.*

View File

@ -39,6 +39,12 @@ unix {
}
!isEmpty(target.path): INSTALLS += target
header.path = /usr/include/chinese-seg/
header.files = *.h
header_cppjieba.path = /usr/include/chinese-seg/cppjieba/
header_cppjieba.files = cppjieba/*
INSTALLS += header header_cppjieba
#DISTFILES += \
# jiaba/jieba.pri

View File

@ -19,7 +19,7 @@
*/
#include "construct-document.h"
#include "file-utils.h"
#include "chinese-segmentation.h"
#include "chinese-seg/chinese-segmentation.h"
#include <QDebug>
#include <QThread>
#include <QUrl>

View File

@ -28,7 +28,7 @@
#include "file-utils.h"
#include "index-generator.h"
#include "global-settings.h"
#include "chinese-segmentation.h"
#include "chinese-seg/chinese-segmentation.h"
#include "construct-document.h"
#include <QStandardPaths>

View File

@ -31,6 +31,33 @@ SearchManager::SearchManager(QObject *parent) : QObject(parent)
m_pool.setExpiryTimeout(1000);
}
void SearchManager::searchLocalFiles(std::string keyword, std::queue<SearchResultInfo> *searchResultFile, std::queue<SearchResultInfo> *searchResultDir, std::queue<SearchResultInfo> *searchResultContent)
{
m_mutex1.lock();
++uniqueSymbol1;
m_mutex1.unlock();
m_mutex2.lock();
++uniqueSymbol2;
m_mutex2.unlock();
m_mutex3.lock();
++uniqueSymbol3;
m_mutex3.unlock();
FileSearchV4 *filesearch;
filesearch = new FileSearchV4(searchResultFile,uniqueSymbol1,QString::fromStdString(keyword),"0",1,0,5);
m_pool.start(filesearch);
FileSearchV4 *dirsearch;
dirsearch = new FileSearchV4(searchResultDir,uniqueSymbol2,QString::fromStdString(keyword),"1",1,0,5);
m_pool.start(dirsearch);
FileContentSearchV4 *contentSearch;
contentSearch = new FileContentSearchV4(searchResultContent,uniqueSymbol3,QString::fromStdString(keyword),0,5);
m_pool.start(contentSearch);
return;
}
SearchManager::~SearchManager()
{
}
@ -438,3 +465,361 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord)
// // deleteAllIndex(pathTobeDelete)
return 0;
}
FileSearchV4::FileSearchV4(std::queue<SearchResultInfo> *searchResultFile, size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num)
{
this->setAutoDelete(true);
m_search_result = searchResultFile;
m_uniqueSymbol = uniqueSymbol;
m_keyword = keyword;
m_value = value;
m_slot = slot;
m_begin = begin;
m_num = num;
}
FileSearchV4::~FileSearchV4()
{
m_search_result = nullptr;
}
void FileSearchV4::run()
{
int resultCount = 0;
int total = 0;
while(total < 100)
{
resultCount = keywordSearchfile();
if(resultCount == 0 || resultCount == -1)
break;
total += resultCount;
m_begin += m_num;
}
return;
}
int FileSearchV4::keywordSearchfile()
{
try
{
qDebug() << "--keywordSearchfile start--";
Xapian::Database db(INDEX_PATH);
Xapian::Query query = creatQueryForFileSearch(db);
Xapian::Enquire enquire(db);
Xapian::Query queryFile;
if(!m_value.isEmpty())
{
std::string slotValue = m_value.toStdString();
Xapian::Query queryValue = Xapian::Query(Xapian::Query::OP_VALUE_RANGE,m_slot,slotValue,slotValue);
queryFile = Xapian::Query(Xapian::Query::OP_AND,query,queryValue);
}
else
{
queryFile = query;
}
qDebug() << "keywordSearchfile:"<<QString::fromStdString(queryFile.get_description());
enquire.set_query(queryFile);
Xapian::MSet result = enquire.get_mset(m_begin, m_num);
int resultCount = result.size();
qDebug()<< "keywordSearchfile results count=" <<resultCount;
if(resultCount == 0)
return 0;
if(getResult(result) == -1)
return -1;
qDebug()<< "--keywordSearchfile finish--";
return resultCount;
}
catch(const Xapian::Error &e)
{
qWarning() <<QString::fromStdString(e.get_description());
qDebug()<< "--keywordSearchfile finish--";
return -1;
}
}
Xapian::Query FileSearchV4::creatQueryForFileSearch(Xapian::Database &db)
{
auto userInput = m_keyword.toLower();
std::vector<Xapian::Query> v;
for(int i=0;i<userInput.size();i++)
{
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.at(i)).toStdString()));
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
}
Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
return queryPhrase;
}
int FileSearchV4::getResult(Xapian::MSet &result)
{
for (auto it = result.begin(); it != result.end(); ++it)
{
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
Xapian::weight docScoreWeight = it.get_weight();
Xapian::percent docScorePercent = it.get_percent();
QString path = QString::fromStdString(data);
SearchResultInfo sSearchResult;
std::string().swap(data);
if(SearchManager::isBlocked(path))
continue;
QFileInfo info(path);
if(!info.exists())
{
// pathTobeDelete->append(QString::fromStdString(data));
qDebug()<<path<<"is not exist!!";
}
else
{
sSearchResult.fileName = info.fileName().toStdString();
sSearchResult.filePath = path.toStdString();
sSearchResult.size = static_cast<size_t>(info.size());
sSearchResult.type = info.suffix().toStdString();
sSearchResult.time = info.lastModified().toString("yyyy/MM/dd hh:mm:ss").toStdString();
switch (m_value.toInt())
{
case 1:
SearchManager::m_mutex1.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbol2)
{
m_search_result->push(sSearchResult);
SearchManager::m_mutex1.unlock();
}
else
{
SearchManager::m_mutex1.unlock();
return -1;
}
break;
case 0:
SearchManager::m_mutex2.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbol1)
{
m_search_result->push(sSearchResult);
SearchManager::m_mutex2.unlock();
}
else
{
SearchManager::m_mutex2.unlock();
return -1;
}
break;
default:
break;
}
// searchResult.append(path);
}
qDebug()<< "doc="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
}
// if(!pathTobeDelete->isEmpty())
// deleteAllIndex(pathTobeDelete)
return 0;
}
FileContentSearchV4::FileContentSearchV4(std::queue<SearchResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num)
{
this->setAutoDelete(true);
m_search_result = searchResult;
m_uniqueSymbol = uniqueSymbol;
m_keyword = keyword;
m_begin = begin;
m_num = num;
}
FileContentSearchV4::~FileContentSearchV4()
{
m_search_result = nullptr;
}
void FileContentSearchV4::run()
{
int resultCount = 0;
int total = 0;
while(total<50)
{
resultCount = keywordSearchContent();
if(resultCount == 0 || resultCount == -1)
break;
total += resultCount;
m_begin += m_num;
}
return;
}
int FileContentSearchV4::keywordSearchContent()
{
try
{
qDebug()<<"--keywordSearchContent search start--";
Xapian::Database db(CONTENT_INDEX_PATH);
Xapian::Enquire enquire(db);
Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db);
/*
::friso::ResultMap ret;
::friso::FrisoSegmentation::getInstance()->callSegement(ret, keyword.toLocal8Bit().data());
for (::friso::ResultMap::iterator it_map = ret.begin(); it_map != ret.end(); ++it_map){
target_str += it_map->first;
target_str += " ";
it_map->second.first.clear();
::std::vector<size_t>().swap(it_map->second.first);
}
ret.clear();
ret.erase(ret.begin(), ret.end());
::friso::ResultMap().swap(ret);
*/
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword);
//Creat a query
std::string words;
for(int i=0;i<sKeyWord.size();i++)
{
words.append(sKeyWord.at(i).word).append(" ");
}
Xapian::Query query = qp.parse_query(words);
// Xapian::Query query = qp.parse_query(keyword.toStdString());
// QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword);
// //Creat a query
// std::string words;
// for(int i=0;i<sKeyWord.size();i++)
// {
// words.append(sKeyWord.at(i).word).append(" ");
// }
// Xapian::Query query = qp.parse_query(words);
// std::vector<Xapian::Query> v;
// for(int i=0;i<sKeyWord.size();i++)
// {
// v.push_back(Xapian::Query(sKeyWord.at(i).word));
// qDebug()<<QString::fromStdString(sKeyWord.at(i).word);
// }
// Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
qDebug()<<"keywordSearchContent:"<<QString::fromStdString(query.get_description());
enquire.set_query(query);
Xapian::MSet result = enquire.get_mset(m_begin, m_num);
int resultCount = result.size();
if(result.size() == 0)
return 0;
qDebug()<< "keywordSearchContent results count=" <<resultCount;
if(getResult(result,words) == -1)
return -1;
qDebug()<< "--keywordSearchContent search finish--";
return resultCount;
}
catch(const Xapian::Error &e)
{
qWarning() <<QString::fromStdString(e.get_description());
qDebug()<< "--keywordSearchContent search finish--";
return -1;
}
}
int FileContentSearchV4::getResult(Xapian::MSet &result, std::string &keyWord)
{
for (auto it = result.begin(); it != result.end(); ++it)
{
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
double docScoreWeight = it.get_weight();
Xapian::percent docScorePercent = it.get_percent();
QString path = QString::fromStdString(doc.get_value(1));
if(SearchManager::isBlocked(path))
continue;
QFileInfo info(path);
SearchResultInfo sSearchResult;
if(!info.exists())
{
// pathTobeDelete->append(QString::fromStdString(data));
qDebug()<<path<<"is not exist!!";
continue;
}
sSearchResult.fileName = info.fileName().toStdString();
sSearchResult.filePath = path.toStdString();
sSearchResult.size = static_cast<size_t>(info.size());
sSearchResult.type = info.suffix().toStdString();
sSearchResult.time = info.lastModified().toString("yyyy/MM/dd hh:mm:ss").toStdString();
// Construct snippets containing keyword.
// snippets.append(QString::fromStdString( result.snippet(doc.get_data(),400)));
// qWarning()<<QString::fromStdString(s);
auto term = doc.termlist_begin();
std::string wordTobeFound = QString::fromStdString(keyWord).section(" ",0,0).toStdString();
int size = wordTobeFound.length();
term.skip_to(wordTobeFound);
int count =0;
for(auto pos = term.positionlist_begin();pos != term.positionlist_end()&&count < 6;++pos)
{
std::string s = data.substr((*pos < 60)? 0: (*pos - 60) , size + 120);
QString snippet = QString::fromStdString(s);
if(snippet.size() > 6 + QString::fromStdString(keyWord).size())
snippet.replace(0,3,"...").replace(snippet.size()-3,3,"...");
else
snippet.append("...").prepend("...");
sSearchResult.snippets.push_back(snippet.toStdString());
QString().swap(snippet);
std::string().swap(s);
++count;
}
std::string().swap(data);
// for(QString i : QString::fromStdString(keyWord).split(" ",QString::SkipEmptyParts))
// {
// std::string word = i.toStdString();
// term.skip_to(word);
// int size = word.size();
// auto pos = term.positionlist_begin();
// std::string s = data.substr((*pos < 60)? 0: (*pos - 60) , size + 120);
// QString snippet = QString::fromStdString(s);
// snippet.replace(0,3,"...").replace(snippet.size()-3,3,"...");
// snippets.append(snippet);
// std::string().swap(word);
// std::string().swap(s);
// QString().swap(snippet);
// }
SearchManager::m_mutex3.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbol3)
{
m_search_result->push(sSearchResult);
SearchManager::m_mutex3.unlock();
}
else
{
SearchManager::m_mutex3.unlock();
return -1;
}
// searchResult.insert(path,snippets);
qDebug()<< "path="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
}
// // if(!pathTobeDelete->isEmpty())
// // deleteAllIndex(pathTobeDelete)
return 0;
}

View File

@ -36,23 +36,39 @@
#include <QtConcurrent/QtConcurrent>
#include <QThread>
#include <QUrl>
#include <list>
#include <queue>
#include "file-utils.h"
#include "global-settings.h"
#include "chinese-segmentation.h"
#include "../file-utils.h"
#include "../global-settings.h"
#include "chinese-seg/chinese-segmentation.h"
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
struct SearchResultInfo{
std::string filePath;
std::string fileName;
std::list<std::string> snippets;
std::string time;
std::string type;
size_t size;
};
class SearchManager : public QObject
class LIBSEARCH_EXPORT SearchManager : public QObject
{
friend class FileSearch;
friend class FileContentSearch;
friend class FileSearchV4;
friend class FileContentSearchV4;
Q_OBJECT
public:
explicit SearchManager(QObject *parent = nullptr);
void searchLocalFiles(std::string keyword,
std::queue<SearchResultInfo> *searchResultFile,
std::queue<SearchResultInfo> *searchResultDir,
std::queue<SearchResultInfo> *searchResultContent);
~SearchManager();
static int getCurrentIndexCount();
@ -60,6 +76,7 @@ public:
static size_t uniqueSymbol1;
static size_t uniqueSymbol2;
static size_t uniqueSymbol3;
static QMutex m_mutex1;
static QMutex m_mutex2;
static QMutex m_mutex3;
@ -133,4 +150,42 @@ private:
int m_begin = 0;
int m_num = 20;
};
class FileSearchV4 : public QRunnable
{
public:
explicit FileSearchV4(std::queue<SearchResultInfo> *searchResult,size_t uniqueSymbol, QString keyword, QString value,unsigned slot = 1,int begin = 0, int num = 20);
~FileSearchV4();
protected:
void run();
private:
int keywordSearchfile();
Xapian::Query creatQueryForFileSearch(Xapian::Database &db);
int getResult(Xapian::MSet &result);
std::queue<SearchResultInfo> *m_search_result = nullptr;
QString m_value;
unsigned m_slot = 1;
size_t m_uniqueSymbol;
QString m_keyword;
int m_begin = 0;
int m_num = 20;
};
class FileContentSearchV4 : public QRunnable
{
public:
explicit FileContentSearchV4(std::queue<SearchResultInfo> *searchResult,size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
~FileContentSearchV4();
protected:
void run();
private:
int keywordSearchContent();
int getResult(Xapian::MSet &result,std::string &keyWord);
std::queue<SearchResultInfo> *m_search_result = nullptr;
size_t m_uniqueSymbol;
QString m_keyword;
int m_begin = 0;
int m_num = 20;
};
#endif // SEARCHMANAGER_H

View File

@ -31,14 +31,14 @@
#include "index/ukui-search-qdbus.h"
#include "index/inotify-index.h"
class LIBSEARCH_EXPORT GlobalSearch
{
public:
//class LIBSEARCH_EXPORT GlobalSearch
//{
//public:
static QStringList fileSearch(QString keyword, int begin = 0, int num = -1);
// static QStringList fileSearch(QString keyword, int begin = 0, int num = -1);
private:
GlobalSearch();
};
//private:
// GlobalSearch();
//};
#endif // LIBSEARCH_H

View File

@ -59,15 +59,22 @@ unix {
target.path = $$[QT_INSTALL_LIBS]
INSTALLS += target
header.path = /usr/include/ukui-search
header.files += libsearch.h
INSTALLS += header
header.path = /usr/include/ukui-search/
header.files += *.h
header_appsearch.path = /usr/include/ukui-search/appsearch/
header_appsearch.files = appsearch/*.h
header_index.path = /usr/include/ukui-search/index/
header_index.files = index/*.h
header_settingsearch.path = /usr/include/ukui-search/settingsearch/
header_settingsearch.files = settingsearch/*.h
INSTALLS += header header_appsearch header_index header_settingsearch
}
INCLUDEPATH += $$PWD/../libchinese-segmentation
DEPENDPATH += $$PWD/../libchinese-segmentation
#INCLUDEPATH += $$PWD/../libchinese-segmentation
#DEPENDPATH += $$PWD/../libchinese-segmentation
#DISTFILES += \
# ../translations/libsearch/libukui-search_zh_CN.ts