Merge branch '0720-newfrontend' into 'new-fronted'

0720 update.

See merge request kylin-desktop/ukui-search!65
This commit is contained in:
PengfeiZhang 2021-07-20 06:17:02 +00:00
commit 200a6059c9
133 changed files with 7995 additions and 3765 deletions

1
.gitignore vendored
View File

@ -14,7 +14,6 @@
*.so.* *.so.*
*_pch.h.cpp *_pch.h.cpp
*_resource.rc *_resource.rc
*.qm
.#* .#*
*.*# *.*#
core core

View File

@ -1,4 +1,17 @@
# ukui-search # ukui-search
[WIP] UKUI Search is a user-wide desktop search feature of UKUI desktop environment. [dWIP] UKUI Search is a user-wide desktop search feature of UKUI desktop environment.
Build from source
git clone https://github.com/ukui/ukui-search.git
cd ukui-search && mkdir build && cd build
qmake .. && make
sudo make install
/usr/bin/ukui-search

View File

@ -9,6 +9,7 @@ Exec=/usr/bin/ukui-search -s
Type=Application Type=Application
Icon=kylin-search Icon=kylin-search
X-UKUI-AutoRestart=true X-UKUI-AutoRestart=true
NoDisplay=true
OnlyShowIn=UKUI OnlyShowIn=UKUI
X-UKUI-Autostart-Phase=Application X-UKUI-Autostart-Phase=Application
Terminal=false Terminal=false

7
debian/changelog vendored
View File

@ -1,3 +1,10 @@
ukui-search (0.4.0+0530) v101; urgency=medium
* Bug 57129
* 任务 无
-- zhangpengfei <zhangpengfei@kylinos.cn> Sun, 30 May 2021 11:21:37 +0800
ukui-search (0.4.0+0520) v101; urgency=medium ukui-search (0.4.0+0520) v101; urgency=medium
* Bug 55034,55545,55326,55496 * Bug 55034,55545,55326,55496

View File

@ -130,10 +130,10 @@ void HomePage::createSection(const QString &section_name, const HomePageItemShap
this->appendSection(section); this->appendSection(section);
connect(section, &HomePageSection::requestAction, this, [ = ](const QString &key, const QString &action, const QString &pluginId) { connect(section, &HomePageSection::requestAction, this, [ = ](const QString &key, const QString &action, const QString &pluginId) {
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(pluginId); SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(pluginId);
if (plugin) { // if (plugin) {
plugin->openAction(action, key); // plugin->openAction(action, key);
} else { // } else {
qWarning()<<"Get plugin failed!"; // qWarning()<<"Get plugin failed!";
} // }
}); });
} }

View File

@ -27,11 +27,11 @@ using namespace Zeeker;
#define DETAIL_BACKGROUND_COLOR QColor(0, 0, 0, 0) #define DETAIL_BACKGROUND_COLOR QColor(0, 0, 0, 0)
#define DETAIL_WIDGET_TRANSPARENT 0.04 #define DETAIL_WIDGET_TRANSPARENT 0.04
#define DETAIL_WIDGET_BORDER_RADIUS 4 #define DETAIL_WIDGET_BORDER_RADIUS 4
#define DETAIL_WIDGET_MARGINS 8,40,40,8 #define DETAIL_WIDGET_MARGINS 8,0,8,0
#define DETAIL_FRAME_MARGINS 8,0,8,0 #define DETAIL_FRAME_MARGINS 8,0,0,0
#define DETAIL_ICON_HEIGHT 120 #define DETAIL_ICON_HEIGHT 120
#define NAME_LABEL_WIDTH 280 #define NAME_LABEL_WIDTH 280
#define ICON_SIZE QSize(96, 96) #define ICON_SIZE QSize(120, 120)
#define LINE_STYLE "QFrame{background: rgba(0,0,0,0.2);}" #define LINE_STYLE "QFrame{background: rgba(0,0,0,0.2);}"
#define ACTION_NORMAL_COLOR QColor(55, 144, 250, 255) #define ACTION_NORMAL_COLOR QColor(55, 144, 250, 255)
#define ACTION_HOVER_COLOR QColor(64, 169, 251, 255) #define ACTION_HOVER_COLOR QColor(64, 169, 251, 255)
@ -138,8 +138,18 @@ QString escapeHtml(const QString & str) {
void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginIface::ResultInfo &info) void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginIface::ResultInfo &info)
{ {
m_iconLabel->setPixmap(info.icon.pixmap(info.icon.actualSize(ICON_SIZE))); clearLayout(m_descFrameLyt);
m_iconLabel->show(); clearLayout(m_previewFrameLyt);
if(SearchPluginManager::getInstance()->getPlugin(plugin_name)->isPreviewEnable(info.actionKey,info.type)) {
m_iconLabel->hide();
m_previewFrameLyt->addWidget(SearchPluginManager::getInstance()->getPlugin(plugin_name)->previewPage(info.actionKey,info.type, m_previewFrame), 0 , Qt::AlignHCenter);
m_previewFrameLyt->setContentsMargins(0,0,0,0);
m_previewFrame->show();
} else {
m_previewFrame->hide();
m_iconLabel->setPixmap(info.icon.pixmap(info.icon.actualSize(ICON_SIZE)));
m_iconLabel->show();
}
QFontMetrics fontMetrics = m_nameLabel->fontMetrics(); QFontMetrics fontMetrics = m_nameLabel->fontMetrics();
QString name = fontMetrics.elidedText(info.name, Qt::ElideRight, NAME_LABEL_WIDTH - 8); QString name = fontMetrics.elidedText(info.name, Qt::ElideRight, NAME_LABEL_WIDTH - 8);
m_nameLabel->setText(QString("<h3 style=\"font-weight:normal;\">%1</h3>").arg(escapeHtml(name))); m_nameLabel->setText(QString("<h3 style=\"font-weight:normal;\">%1</h3>").arg(escapeHtml(name)));
@ -147,6 +157,7 @@ void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginI
m_pluginLabel->setText(plugin_name); m_pluginLabel->setText(plugin_name);
m_nameFrame->show(); m_nameFrame->show();
m_line_1->show(); m_line_1->show();
if (info.description.length() > 0) { if (info.description.length() > 0) {
//NEW_TODO 样式待优化 //NEW_TODO 样式待优化
clearLayout(m_descFrameLyt); clearLayout(m_descFrameLyt);
@ -154,7 +165,7 @@ void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginI
QLabel * descLabel = new QLabel(m_descFrame); QLabel * descLabel = new QLabel(m_descFrame);
descLabel->setTextFormat(Qt::PlainText); descLabel->setTextFormat(Qt::PlainText);
descLabel->setWordWrap(true); descLabel->setWordWrap(true);
QString show_desc = desc.key + ": " + desc.value; QString show_desc = desc.key + " " + desc.value;
descLabel->setText(show_desc); descLabel->setText(show_desc);
m_descFrameLyt->addWidget(descLabel); m_descFrameLyt->addWidget(descLabel);
} }
@ -162,8 +173,8 @@ void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginI
m_line_2->show(); m_line_2->show();
} }
clearLayout(m_actionFrameLyt); clearLayout(m_actionFrameLyt);
Q_FOREACH (auto action, info.actionList) { Q_FOREACH (SearchPluginIface::Actioninfo actioninfo, SearchPluginManager::getInstance()->getPlugin(plugin_name)->getActioninfo(info.type)) {
ActionLabel * actionLabel = new ActionLabel(action, info.key, plugin_name, m_actionFrame); ActionLabel * actionLabel = new ActionLabel(actioninfo.displayName, info.actionKey, actioninfo.actionkey, plugin_name, info.type, m_actionFrame);
m_actionFrameLyt->addWidget(actionLabel); m_actionFrameLyt->addWidget(actionLabel);
} }
m_actionFrame->show(); m_actionFrame->show();
@ -189,6 +200,8 @@ void DetailWidget::initUi()
m_iconLabel = new QLabel(this); m_iconLabel = new QLabel(this);
m_iconLabel->setFixedHeight(DETAIL_ICON_HEIGHT); m_iconLabel->setFixedHeight(DETAIL_ICON_HEIGHT);
m_iconLabel->setAlignment(Qt::AlignCenter); m_iconLabel->setAlignment(Qt::AlignCenter);
m_previewFrame = new QFrame(this);
m_previewFrameLyt = new QHBoxLayout(m_previewFrame);
m_nameFrame = new QFrame(this); m_nameFrame = new QFrame(this);
m_nameFrameLyt = new QHBoxLayout(m_nameFrame); m_nameFrameLyt = new QHBoxLayout(m_nameFrame);
@ -222,6 +235,7 @@ void DetailWidget::initUi()
m_actionFrameLyt->setContentsMargins(DETAIL_FRAME_MARGINS); m_actionFrameLyt->setContentsMargins(DETAIL_FRAME_MARGINS);
m_mainLyt->addWidget(m_iconLabel); m_mainLyt->addWidget(m_iconLabel);
m_mainLyt->addWidget(m_previewFrame, 0, Qt::AlignHCenter);
m_mainLyt->addWidget(m_nameFrame); m_mainLyt->addWidget(m_nameFrame);
m_mainLyt->addWidget(m_line_1); m_mainLyt->addWidget(m_line_1);
m_mainLyt->addWidget(m_descFrame); m_mainLyt->addWidget(m_descFrame);
@ -248,7 +262,7 @@ void DetailWidget::paintEvent(QPaintEvent * event)
void DetailWidget::clearLayout(QLayout *layout) void DetailWidget::clearLayout(QLayout *layout)
{ {
if(! layout) return; if(!layout) return;
QLayoutItem * child; QLayoutItem * child;
while((child = layout->takeAt(0)) != 0) { while((child = layout->takeAt(0)) != 0) {
if(child->widget()) { if(child->widget()) {
@ -259,11 +273,13 @@ void DetailWidget::clearLayout(QLayout *layout)
child = NULL; child = NULL;
} }
ActionLabel::ActionLabel(const QString &action, const QString &key, const QString &plugin, QWidget *parent) : QLabel(parent) ActionLabel::ActionLabel(const QString &action, const QString &key, const int &ActionKey, const QString &pluginId, const int type, QWidget *parent) : QLabel(parent)
{ {
m_action = action; m_action = action;
m_key = key; m_key = key;
m_plugin = plugin; m_actionKey = ActionKey;
m_type = type;
m_pluginId = pluginId;
this->initUi(); this->initUi();
this->installEventFilter(this); this->installEventFilter(this);
} }
@ -287,9 +303,9 @@ bool ActionLabel::eventFilter(QObject *watched, QEvent *event)
this->setForegroundRole(QPalette::Dark); this->setForegroundRole(QPalette::Dark);
return true; return true;
} else if(event->type() == QEvent::MouseButtonRelease) { } else if(event->type() == QEvent::MouseButtonRelease) {
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin); SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_pluginId);
if (plugin) if (plugin)
plugin->openAction(m_action, m_key); plugin->openAction(m_actionKey, m_key, m_type);
else else
qWarning()<<"Get plugin failed!"; qWarning()<<"Get plugin failed!";
this->setForegroundRole(QPalette::Light); this->setForegroundRole(QPalette::Light);

View File

@ -25,7 +25,7 @@
#include <QPainter> #include <QPainter>
#include <QStyleOption> #include <QStyleOption>
#include "result-view.h" #include "result-view.h"
#include "plugininterface/search-plugin-iface.h" #include "search-plugin-iface.h"
namespace Zeeker { namespace Zeeker {
class ResultArea : public QScrollArea class ResultArea : public QScrollArea
@ -70,6 +70,8 @@ private:
void clearLayout(QLayout *); void clearLayout(QLayout *);
QVBoxLayout * m_mainLyt = nullptr; QVBoxLayout * m_mainLyt = nullptr;
QLabel * m_iconLabel = nullptr; QLabel * m_iconLabel = nullptr;
QFrame *m_previewFrame = nullptr;
QHBoxLayout *m_previewFrameLyt = nullptr;
QFrame * m_nameFrame = nullptr; QFrame * m_nameFrame = nullptr;
QHBoxLayout * m_nameFrameLyt = nullptr; QHBoxLayout * m_nameFrameLyt = nullptr;
QLabel * m_nameLabel = nullptr; QLabel * m_nameLabel = nullptr;
@ -100,13 +102,15 @@ class ActionLabel : public QLabel
{ {
Q_OBJECT Q_OBJECT
public: public:
ActionLabel(const QString &action, const QString &key, const QString &plugin, QWidget *parent = nullptr); ActionLabel(const QString &action, const QString &key, const int &ActionKey, const QString &pluginId, const int type = 0, QWidget *parent = nullptr);
~ActionLabel() = default; ~ActionLabel() = default;
private: private:
void initUi(); void initUi();
QString m_action; QString m_action;
QString m_key; QString m_key;
QString m_plugin; int m_actionKey;
int m_type = 0;
QString m_pluginId;
protected: protected:
bool eventFilter(QObject *, QEvent *); bool eventFilter(QObject *, QEvent *);

View File

@ -21,8 +21,8 @@
#include "search-page.h" #include "search-page.h"
using namespace Zeeker; using namespace Zeeker;
#define RESULT_WIDTH 240 #define RESULT_WIDTH 266
#define DETAIL_WIDTH 400 #define DETAIL_WIDTH 374
SearchPage::SearchPage(QWidget *parent) : QWidget(parent) SearchPage::SearchPage(QWidget *parent) : QWidget(parent)
{ {
@ -54,6 +54,7 @@ void SearchPage::appendPlugin(const QString &plugin_id)
void SearchPage::initUi() void SearchPage::initUi()
{ {
m_splitter = new QSplitter(this); m_splitter = new QSplitter(this);
m_splitter->setContentsMargins(0, 0, 0, 0);
m_resultArea = new ResultArea(m_splitter); m_resultArea = new ResultArea(m_splitter);
m_detailArea = new DetailArea(m_splitter); m_detailArea = new DetailArea(m_splitter);
m_splitter->addWidget(m_resultArea); m_splitter->addWidget(m_resultArea);

View File

@ -20,13 +20,14 @@ DEFINES += QT_DEPRECATED_WARNINGS
# In order to do so, uncomment the following line. # In order to do so, uncomment the following line.
# You can also select to disable deprecated APIs only up to a certain version of Qt. # You can also select to disable deprecated APIs only up to a certain version of Qt.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
include(../libsearch/libukui-search-headers.pri)
include(control/control.pri) include(control/control.pri)
include(model/model.pri) include(model/model.pri)
include(xatom/xatom.pri) include(xatom/xatom.pri)
include(singleapplication/qt-single-application.pri) include(singleapplication/qt-single-application.pri)
include(view/view.pri) include(view/view.pri)
SOURCES += \ SOURCES += \
main.cpp \ main.cpp \
mainwindow.cpp mainwindow.cpp

View File

@ -253,6 +253,14 @@ int main(int argc, char *argv[]) {
qDebug() << "Load translations file" << QLocale() << "failed!"; qDebug() << "Load translations file" << QLocale() << "failed!";
} }
QTranslator lib_translator;
try {
if(! lib_translator.load("/usr/share/ukui-search/translations/libukui-search_" + QLocale::system().name())) throw - 1;
app.installTranslator(&lib_translator);
} catch(...) {
qDebug() << "Load translations file" << QLocale() << "failed!";
}
//set main window to the center of screen //set main window to the center of screen
MainWindow *w = new MainWindow; MainWindow *w = new MainWindow;
qApp->setWindowIcon(QIcon::fromTheme("kylin-search")); qApp->setWindowIcon(QIcon::fromTheme("kylin-search"));
@ -275,7 +283,7 @@ int main(int argc, char *argv[]) {
QObject::connect(&app, &QtSingleApplication::messageReceived, w, &MainWindow::bootOptionsFilter); QObject::connect(&app, &QtSingleApplication::messageReceived, w, &MainWindow::bootOptionsFilter);
// Start app search thread // Start app search thread
AppMatch::getAppMatch()->start(); // AppMatch::getAppMatch()->start();
// NEW_TODO // NEW_TODO
// Set threads which in global thread pool expiry time in 5ms, some prolems here // Set threads which in global thread pool expiry time in 5ms, some prolems here

View File

@ -37,17 +37,17 @@
#include "qt-single-application.h" #include "qt-single-application.h"
#include "global-settings.h" #include "global-settings.h"
#define MAIN_MARGINS 16,8,16,16 #define MAIN_MARGINS 16,16,16,16
#define TITLE_MARGINS 0,0,0,0 #define TITLE_MARGINS 0,0,0,0
#define UKUI_SEARCH_SCHEMAS "org.ukui.search.settings" #define UKUI_SEARCH_SCHEMAS "org.ukui.search.settings"
#define SEARCH_METHOD_KEY "indexSearch" #define SEARCH_METHOD_KEY "indexSearch"
#define WEB_ENGINE_KEY "webEngine" #define WEB_ENGINE_KEY "webEngine"
#define WINDOW_WIDTH 640 #define WINDOW_WIDTH 680
#define WINDOW_HEIGHT 590 #define WINDOW_HEIGHT 600
#define TITLE_HEIGHT 40 #define TITLE_HEIGHT 40
#define WINDOW_ICON_SIZE 24 #define WINDOW_ICON_SIZE 24
#define SETTING_BTN_SIZE 30 #define SETTING_BTN_SIZE 30
#define SEARCH_BAR_SIZE 44 #define SEARCH_BAR_SIZE 48
#define ASK_INDEX_TIME 5*1000 #define ASK_INDEX_TIME 5*1000
#define RESEARCH_TIME 10*1000 #define RESEARCH_TIME 10*1000
@ -95,7 +95,7 @@ MainWindow::MainWindow(QWidget *parent) :
//NEW_TODO, register plugins //NEW_TODO, register plugins
// SearchPluginManager::getInstance()->registerPlugin(\\); // SearchPluginManager::getInstance()->registerPlugin(\\);
// m_stackedWidget->setPlugins(SearchPluginManager::getInstance()->getPluginIds()); // m_stackedWidget->setPlugins(SearchPluginManager::getInstance()->getPluginIds());
m_stackedWidget->setPlugins(QStringList()<<"File"<<"Folder"); m_stackedWidget->setPlugins(SearchPluginManager::getInstance()->getPluginIds());
} }
MainWindow::~MainWindow() { MainWindow::~MainWindow() {
@ -140,25 +140,25 @@ void MainWindow::initUi() {
mainlayout->setContentsMargins(MAIN_MARGINS); mainlayout->setContentsMargins(MAIN_MARGINS);
m_frame->setLayout(mainlayout); m_frame->setLayout(mainlayout);
m_titleFrame = new QFrame(m_frame);//标题栏 // m_titleFrame = new QFrame(m_frame);//标题栏
m_titleFrame->setFixedHeight(TITLE_HEIGHT); // m_titleFrame->setFixedHeight(TITLE_HEIGHT);
m_titleLyt = new QHBoxLayout(m_titleFrame); // m_titleLyt = new QHBoxLayout(m_titleFrame);
m_titleLyt->setContentsMargins(TITLE_MARGINS); // m_titleLyt->setContentsMargins(TITLE_MARGINS);
m_iconLabel = new QLabel(m_titleFrame); // m_iconLabel = new QLabel(m_titleFrame);
m_iconLabel->setFixedSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE); // m_iconLabel->setFixedSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE);
m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE))); // m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE)));
m_titleLabel = new QLabel(m_titleFrame); // m_titleLabel = new QLabel(m_titleFrame);
m_titleLabel->setText(tr("Search")); // m_titleLabel->setText(tr("Search"));
m_settingsBtn = new QPushButton(m_titleFrame); // m_settingsBtn = new QPushButton(m_titleFrame);
m_settingsBtn->setFixedSize(SETTING_BTN_SIZE, SETTING_BTN_SIZE); // m_settingsBtn->setFixedSize(SETTING_BTN_SIZE, SETTING_BTN_SIZE);
m_settingsBtn->setIcon(QIcon::fromTheme("document-properties-symbolic")); // m_settingsBtn->setIcon(QIcon::fromTheme("document-properties-symbolic"));
m_settingsBtn->setProperty("useIconHighlightEffect", 0x2); // m_settingsBtn->setProperty("useIconHighlightEffect", 0x2);
m_settingsBtn->setProperty("isWindowButton", 0x01); // m_settingsBtn->setProperty("isWindowButton", 0x01);
m_settingsBtn->setFlat(true); // m_settingsBtn->setFlat(true);
m_titleLyt->addWidget(m_iconLabel); // m_titleLyt->addWidget(m_iconLabel);
m_titleLyt->addWidget(m_titleLabel); // m_titleLyt->addWidget(m_titleLabel);
m_titleLyt->addStretch(); // m_titleLyt->addStretch();
m_titleLyt->addWidget(m_settingsBtn); // m_titleLyt->addWidget(m_settingsBtn);
m_stackedWidget = new StackedWidget(m_frame);//内容栏 m_stackedWidget = new StackedWidget(m_frame);//内容栏
m_searchWidget = new SeachBarWidget(this); m_searchWidget = new SeachBarWidget(this);
@ -166,9 +166,9 @@ void MainWindow::initUi() {
m_searchWidget->setLayout(m_searchLayout); m_searchWidget->setLayout(m_searchLayout);
m_searchWidget->setFixedHeight(SEARCH_BAR_SIZE); m_searchWidget->setFixedHeight(SEARCH_BAR_SIZE);
mainlayout->addWidget(m_titleFrame); // mainlayout->addWidget(m_titleFrame);
mainlayout->addWidget(m_stackedWidget);
mainlayout->addWidget(m_searchWidget); mainlayout->addWidget(m_searchWidget);
mainlayout->addWidget(m_stackedWidget);
//创建索引询问弹窗 //创建索引询问弹窗
m_askDialog = new CreateIndexAskDialog(this); m_askDialog = new CreateIndexAskDialog(this);
@ -197,9 +197,9 @@ void MainWindow::initConnections()
}); });
connect(m_settingsBtn, &QPushButton::clicked, this, &MainWindow::settingsBtnClickedSlot); connect(m_settingsBtn, &QPushButton::clicked, this, &MainWindow::settingsBtnClickedSlot);
//主题改变时,更新自定义标题栏的图标 //主题改变时,更新自定义标题栏的图标
connect(qApp, &QApplication::paletteChanged, this, [ = ]() { // connect(qApp, &QApplication::paletteChanged, this, [ = ]() {
m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE))); // m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE)));
}); // });
connect(m_searchLayout, &SearchBarHLayout::requestSearchKeyword, this, &MainWindow::searchKeywordSlot); connect(m_searchLayout, &SearchBarHLayout::requestSearchKeyword, this, &MainWindow::searchKeywordSlot);
connect(m_stackedWidget, &StackedWidget::effectiveSearch, m_searchLayout, &SearchBarHLayout::effectiveSearchRecord); connect(m_stackedWidget, &StackedWidget::effectiveSearch, m_searchLayout, &SearchBarHLayout::effectiveSearchRecord);
} }

View File

@ -24,7 +24,7 @@ using namespace Zeeker;
SearchResultManager::SearchResultManager(const QString& plugin_id, QObject *parent) : QObject(parent) SearchResultManager::SearchResultManager(const QString& plugin_id, QObject *parent) : QObject(parent)
{ {
m_plugin_id = plugin_id; m_plugin_id = plugin_id;
m_result_queue = new QQueue<SearchPluginIface::ResultInfo>; m_result_queue = new DataQueue<SearchPluginIface::ResultInfo>;
m_get_result_thread = new ReceiveResultThread(m_result_queue); m_get_result_thread = new ReceiveResultThread(m_result_queue);
initConnections(); initConnections();
} }
@ -39,59 +39,59 @@ void SearchResultManager::startSearch(const QString &keyword)
} }
m_result_queue->clear(); m_result_queue->clear();
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id); SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id);
// plugin->KeywordSearch(keyword, m_result_queue); plugin->KeywordSearch(keyword, m_result_queue);
/*********************测试用数据*********************/ /*********************测试用数据*********************/
SearchPluginIface::ResultInfo test_info; // SearchPluginIface::ResultInfo test_info;
if (m_plugin_id == "File") { // if (m_plugin_id == "File") {
test_info.icon = QIcon::fromTheme("ukui-control-center"); // test_info.icon = QIcon::fromTheme("ukui-control-center");
test_info.name = "搜索"; // test_info.name = "搜索";
QVector<SearchPluginIface::DescriptionInfo> desc; // QVector<SearchPluginIface::DescriptionInfo> desc;
SearchPluginIface::DescriptionInfo desc_1; // SearchPluginIface::DescriptionInfo desc_1;
desc_1.key = "描述"; // desc_1.key = "描述";
desc_1.value = "控制面板搜索插件"; // desc_1.value = "控制面板搜索插件";
desc.append(desc_1); // desc.append(desc_1);
QStringList actions; // QStringList actions;
actions.append("打开"); // actions.append("打开");
test_info.description = desc; // test_info.description = desc;
test_info.actionList = actions; // test_info.actionList = actions;
m_result_queue->append(test_info); // m_result_queue->append(test_info);
} else { // } else {
test_info.icon = QIcon::fromTheme("unknown"); // test_info.icon = QIcon::fromTheme("unknown");
test_info.name = "文件12345abcde.txt"; // test_info.name = "文件12345abcde.txt";
QVector<SearchPluginIface::DescriptionInfo> desc; // QVector<SearchPluginIface::DescriptionInfo> desc;
SearchPluginIface::DescriptionInfo desc_1; // SearchPluginIface::DescriptionInfo desc_1;
SearchPluginIface::DescriptionInfo desc_2; // SearchPluginIface::DescriptionInfo desc_2;
desc_1.key = "描述"; // desc_1.key = "描述";
desc_1.value = "一个文件"; // desc_1.value = "一个文件";
desc_2.key = "路径"; // desc_2.key = "路径";
desc_2.value = "一个路径/a/b/c/d/e/fffffff/文件12345abcde.txt"; // desc_2.value = "一个路径/a/b/c/d/e/fffffff/文件12345abcde.txt";
desc.append(desc_1); // desc.append(desc_1);
desc.append(desc_2); // desc.append(desc_2);
QStringList actions; // QStringList actions;
actions.append("打开"); // actions.append("打开");
actions.append("复制路径"); // actions.append("复制路径");
test_info.description = desc; // test_info.description = desc;
test_info.actionList = actions; // test_info.actionList = actions;
SearchPluginIface::ResultInfo test_info_1 = test_info; // SearchPluginIface::ResultInfo test_info_1 = test_info;
test_info_1.name = "文件1"; // test_info_1.name = "文件1";
SearchPluginIface::ResultInfo test_info_2 = test_info; // SearchPluginIface::ResultInfo test_info_2 = test_info;
test_info_2.name = "文件2"; // test_info_2.name = "文件2";
SearchPluginIface::ResultInfo test_info_3 = test_info; // SearchPluginIface::ResultInfo test_info_3 = test_info;
test_info_3.name = "文件3"; // test_info_3.name = "文件3";
SearchPluginIface::ResultInfo test_info_4 = test_info; // SearchPluginIface::ResultInfo test_info_4 = test_info;
test_info_4.name = "文件4"; // test_info_4.name = "文件4";
SearchPluginIface::ResultInfo test_info_5 = test_info; // SearchPluginIface::ResultInfo test_info_5 = test_info;
test_info_5.name = "文件5"; // test_info_5.name = "文件5";
SearchPluginIface::ResultInfo test_info_6 = test_info; // SearchPluginIface::ResultInfo test_info_6 = test_info;
test_info_6.name = "文件6"; // test_info_6.name = "文件6";
m_result_queue->append(test_info); // m_result_queue->append(test_info);
m_result_queue->append(test_info_1); // m_result_queue->append(test_info_1);
m_result_queue->append(test_info_2); // m_result_queue->append(test_info_2);
m_result_queue->append(test_info_3); // m_result_queue->append(test_info_3);
m_result_queue->append(test_info_4); // m_result_queue->append(test_info_4);
m_result_queue->append(test_info_5); // m_result_queue->append(test_info_5);
m_result_queue->append(test_info_6); // m_result_queue->append(test_info_6);
} // }
/********************测试用数据********************/ /********************测试用数据********************/
} }
@ -112,7 +112,7 @@ void SearchResultManager::initConnections()
connect(m_get_result_thread, &ReceiveResultThread::gotResultInfo, this, &SearchResultManager::gotResultInfo); connect(m_get_result_thread, &ReceiveResultThread::gotResultInfo, this, &SearchResultManager::gotResultInfo);
} }
ReceiveResultThread::ReceiveResultThread(QQueue<SearchPluginIface::ResultInfo> * result_queue, QObject *parent) ReceiveResultThread::ReceiveResultThread(DataQueue<SearchPluginIface::ResultInfo> * result_queue, QObject *parent)
{ {
m_result_queue = result_queue; m_result_queue = result_queue;
} }

View File

@ -32,14 +32,14 @@ namespace Zeeker {
class ReceiveResultThread : public QThread { class ReceiveResultThread : public QThread {
Q_OBJECT Q_OBJECT
public: public:
ReceiveResultThread(QQueue<SearchPluginIface::ResultInfo> * result_queue, QObject * parent = nullptr); ReceiveResultThread(DataQueue<SearchPluginIface::ResultInfo> * result_queue, QObject * parent = nullptr);
~ReceiveResultThread() = default; ~ReceiveResultThread() = default;
void stop(); void stop();
protected: protected:
void run() override; void run() override;
private: private:
QQueue<SearchPluginIface::ResultInfo> * m_result_queue; DataQueue<SearchPluginIface::ResultInfo> * m_result_queue;
Q_SIGNALS: Q_SIGNALS:
void gotResultInfo(const SearchPluginIface::ResultInfo&); void gotResultInfo(const SearchPluginIface::ResultInfo&);
@ -60,7 +60,7 @@ public Q_SLOTS:
private: private:
void initConnections(); void initConnections();
QString m_plugin_id; QString m_plugin_id;
QQueue<SearchPluginIface::ResultInfo> * m_result_queue; DataQueue<SearchPluginIface::ResultInfo> * m_result_queue;
ReceiveResultThread * m_get_result_thread = nullptr; ReceiveResultThread * m_get_result_thread = nullptr;
Q_SIGNALS: Q_SIGNALS:

View File

@ -126,14 +126,14 @@ const bool &SearchResultModel::isExpanded()
QStringList SearchResultModel::getActions(const QModelIndex &index) QStringList SearchResultModel::getActions(const QModelIndex &index)
{ {
if (m_item->m_result_info_list.length() > index.row() && index.row() >= 0) if (m_item->m_result_info_list.length() > index.row() && index.row() >= 0)
return m_item->m_result_info_list.at(index.row()).actionList; // return m_item->m_result_info_list.at(index.row()).actionList;
return QStringList(); return QStringList();
} }
QString SearchResultModel::getKey(const QModelIndex &index) QString SearchResultModel::getKey(const QModelIndex &index)
{ {
if (m_item->m_result_info_list.length() > index.row() && index.row() >= 0) if (m_item->m_result_info_list.length() > index.row() && index.row() >= 0)
return m_item->m_result_info_list.at(index.row()).key; // return m_item->m_result_info_list.at(index.row()).key;
return NULL; return NULL;
} }

Binary file not shown.

View File

@ -12,6 +12,13 @@ void ResultViewDelegate::setSearchKeyword(const QString &regFindKeyWords)
m_regFindKeyWords = regFindKeyWords; m_regFindKeyWords = regFindKeyWords;
} }
QSize ResultViewDelegate::sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const
{
QSize size = QStyledItemDelegate::sizeHint(option,index);
size.setHeight(size.height() + 10);
return size;
}
void ResultViewDelegate::paint(QPainter * painter, const QStyleOptionViewItem & option, const QModelIndex & index) const { void ResultViewDelegate::paint(QPainter * painter, const QStyleOptionViewItem & option, const QModelIndex & index) const {
QStyleOptionViewItemV4 optionV4 = option; QStyleOptionViewItemV4 optionV4 = option;
initStyleOption(&optionV4, index); initStyleOption(&optionV4, index);
@ -30,7 +37,7 @@ void ResultViewDelegate::paint(QPainter * painter, const QStyleOptionViewItem &
ctx.palette.setColor(QPalette::Text, optionV4.palette.color(QPalette::Active, QPalette::HighlightedText)); ctx.palette.setColor(QPalette::Text, optionV4.palette.color(QPalette::Active, QPalette::HighlightedText));
QRect textRect = style->subElementRect(QStyle::SE_ItemViewItemText, &optionV4); QRect textRect = style->subElementRect(QStyle::SE_ItemViewItemText, &optionV4);
textRect.adjust(0, -5, 0, 0); textRect.adjust(0, 0, 0, 0);
painter->save(); painter->save();
painter->translate(textRect.topLeft()); painter->translate(textRect.topLeft());
painter->setClipRect(textRect.translated(-textRect.topLeft())); painter->setClipRect(textRect.translated(-textRect.topLeft()));

View File

@ -35,6 +35,8 @@ public:
explicit ResultViewDelegate(QObject *parent = nullptr); explicit ResultViewDelegate(QObject *parent = nullptr);
~ResultViewDelegate() = default; ~ResultViewDelegate() = default;
void setSearchKeyword(const QString &); void setSearchKeyword(const QString &);
protected:
QSize sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const;
private: private:
QString m_regFindKeyWords = 0; QString m_regFindKeyWords = 0;
void paint(QPainter *, const QStyleOptionViewItem &, const QModelIndex &) const override; void paint(QPainter *, const QStyleOptionViewItem &, const QModelIndex &) const override;

View File

@ -155,11 +155,11 @@ void ResultView::onRowDoubleClickedSlot(const QModelIndex &index)
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id); SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id);
try { try {
if (plugin) { if (plugin) {
if (!info.actionList.isEmpty()) { // if (!info.actionList.isEmpty()) {
plugin->openAction(info.actionList.at(0), info.key); // plugin->openAction(info.actionList.at(0), info.key);
} else { // } else {
throw -2; // throw -2;
} // }
} else { } else {
throw -1; throw -1;
} }
@ -211,7 +211,7 @@ void ResultView::onMenuTriggered(QAction *action)
//NEW_TODO 接口调整后需要修改 //NEW_TODO 接口调整后需要修改
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id); SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id);
if (plugin) { if (plugin) {
plugin->openAction(action->text(), m_model->getKey(this->currentIndex())); // plugin->openAction(action->text(), m_model->getKey(this->currentIndex()));
} else { } else {
qWarning()<<"Get plugin failed!"; qWarning()<<"Get plugin failed!";
} }

View File

@ -30,12 +30,12 @@ ChineseSegmentation::ChineseSegmentation() {
const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8"; const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8"; const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8"; const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
m_jieba = new cppjieba::Jieba(DICT_PATH, m_jieba = new cppjieba::Jieba(DICT_PATH,
HMM_PATH, HMM_PATH,
USER_DICT_PATH, USER_DICT_PATH,
IDF_PATH, IDF_PATH,
STOP_WORD_PATH); STOP_WORD_PATH,
"");
} }
ChineseSegmentation::~ChineseSegmentation() { ChineseSegmentation::~ChineseSegmentation() {
@ -58,7 +58,7 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
// str.squeeze(); // str.squeeze();
const size_t topk = -1; const size_t topk = -1;
std::vector<cppjieba::KeywordExtractor::Word> keywordres; std::vector<cppjieba::KeyWord> keywordres;
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk); ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
std::string().swap(s); std::string().swap(s);
QVector<SKeyWord> vecNeeds; QVector<SKeyWord> vecNeeds;
@ -66,13 +66,20 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
keywordres.clear(); keywordres.clear();
// keywordres.shrink_to_fit(); // keywordres.shrink_to_fit();
return vecNeeds; return vecNeeds;
} }
void ChineseSegmentation::convert(std::vector<cppjieba::KeywordExtractor::Word> &keywordres, QVector<SKeyWord> &kw) { std::vector<cppjieba::KeyWord> ChineseSegmentation::callSegementStd(const std::string &str) {
const size_t topk = -1;
std::vector<cppjieba::KeyWord> keywordres;
ChineseSegmentation::m_jieba->extractor.Extract(str, keywordres, topk);
return keywordres;
}
void ChineseSegmentation::convert(std::vector<cppjieba::KeyWord> &keywordres, QVector<SKeyWord> &kw) {
for(auto i : keywordres) { for(auto i : keywordres) {
SKeyWord temp; SKeyWord temp;
temp.word = i.word; temp.word = i.word;

View File

@ -48,7 +48,10 @@ public:
static ChineseSegmentation *getInstance(); static ChineseSegmentation *getInstance();
~ChineseSegmentation(); ~ChineseSegmentation();
QVector<SKeyWord> callSegement(std::string s); QVector<SKeyWord> callSegement(std::string s);
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres, QVector<SKeyWord>& kw); //新添加callSegementStd函数修改返回值为stdvector<cppjieba::KeywordExtractor::Word>并简化内部处理流程--jxx20210517
//修改函数入参形式为引用去掉Qstring与std::string转换代码--jxx20210519
std::vector<cppjieba::KeyWord> callSegementStd(const std::string& str);
void convert(std::vector<cppjieba::KeyWord>& keywordres, QVector<SKeyWord>& kw);
private: private:
static QMutex m_mutex; static QMutex m_mutex;
cppjieba::Jieba *m_jieba; cppjieba::Jieba *m_jieba;

View File

@ -0,0 +1,511 @@
#pragma once
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <QDebug>
#include <algorithm>
#include <utility>
#include "limonp/Md5.hpp"
#include "Unicode.hpp"
#include "darts.h"
namespace cppjieba {
using std::pair;
struct DatElement {
string word;
string tag;
double weight = 0;
bool operator < (const DatElement & b) const {
if (word == b.word) {
return this->weight > b.weight;
}
return this->word < b.word;
}
};
struct IdfElement {
string word;
double idf = 0;
bool operator < (const IdfElement & b) const {
if (word == b.word) {
return this->idf > b.idf;
}
return this->word < b.word;
}
};
inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
}
struct DatMemElem {
double weight = 0.0;
char tag[8] = {};
void SetTag(const string & str) {
memset(&tag[0], 0, sizeof(tag));
strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
}
string GetTag() const {
return &tag[0];
}
};
inline std::ostream & operator << (std::ostream& os, const DatMemElem & elem) {
return os << "/tag=" << elem.GetTag() << "/weight=" << elem.weight;
}
struct DatDag {
limonp::LocalVector<pair<size_t, const DatMemElem *> > nexts;
double max_weight;
int max_next;
};
typedef Darts::DoubleArray JiebaDAT;
struct CacheFileHeader {
char md5_hex[32] = {};
double min_weight = 0;
uint32_t elements_num = 0;
uint32_t dat_size = 0;
};
static_assert(sizeof(DatMemElem) == 16, "DatMemElem length invalid");
static_assert((sizeof(CacheFileHeader) % sizeof(DatMemElem)) == 0, "DatMemElem CacheFileHeader length equal");
class DatTrie {
public:
DatTrie() {}
~DatTrie() {
::munmap(mmap_addr_, mmap_length_);
mmap_addr_ = nullptr;
mmap_length_ = 0;
::close(mmap_fd_);
mmap_fd_ = -1;
}
const DatMemElem * Find(const string & key) const {
JiebaDAT::result_pair_type find_result;
dat_.exactMatchSearch(key.c_str(), find_result);
if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
return nullptr;
}
return &elements_ptr_[ find_result.value ];
}
const double Find(const string & key, std::size_t length, std::size_t node_pos) const {
JiebaDAT::result_pair_type find_result;
dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos);
if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
return -1;
}
return idf_elements_ptr_[ find_result.value ];
}
void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
vector<struct DatDag>&res, size_t max_word_len) const {
res.clear();
res.resize(end - begin);
string text_str;
EncodeRunesToString(begin, end, text_str);
static const size_t max_num = 128;
JiebaDAT::result_pair_type result_pairs[max_num] = {};
for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) {
std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + 1, nullptr));
for (std::size_t idx = 0; idx < num_results; ++idx) {
auto & match = result_pairs[idx];
if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
continue;
}
auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
if (char_num > max_word_len) {
continue;
}
auto pValue = &elements_ptr_[match.value];
if (1 == char_num) {
res[i].nexts[0].second = pValue;
continue;
}
res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + char_num, pValue));
}
begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune);
}
}
void Find_Reverse(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
vector<struct DatDag>&res, size_t max_word_len) const {
res.clear();
res.resize(end - begin);
string text_str;
EncodeRunesToString(begin, end, text_str);
static const size_t max_num = 128;
JiebaDAT::result_pair_type result_pairs[max_num] = {};
size_t str_size = end - begin;
for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
begin_pos -= (end - i - 1)->len;
std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - i, nullptr));
for (std::size_t idx = 0; idx < num_results; ++idx) {
auto & match = result_pairs[idx];
if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
continue;
}
auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
if (char_num > max_word_len) {
continue;
}
auto pValue = &elements_ptr_[match.value];
if (1 == char_num) {
res[str_size - i - 1].nexts[0].second = pValue;
continue;
}
res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - 1 - i + char_num, pValue));
}
}
}
void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
vector<WordRange>& words, size_t max_word_len) const {
string text_str;
EncodeRunesToString(begin, end, text_str);
static const size_t max_num = 128;
JiebaDAT::result_pair_type result_pairs[max_num] = {};//存放字典查询结果
size_t str_size = end - begin;
double max_weight[str_size];//存放逆向路径最大weight
for (size_t i = 0; i<str_size; i++) {
max_weight[i] = -3.14e+100;
}
int max_next[str_size];//存放动态规划后的分词结果
memset(max_next,-1,str_size);
double val(0);
for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
size_t nextPos = str_size - i;//逆向计算
begin_pos -= (end - i - 1)->len;
std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
if (0 == num_results) {//字典不存在则单独分词
val = min_weight_;
if (nextPos < str_size) {
val += max_weight[nextPos];
}
if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
max_weight[nextPos - 1] = val;
max_next[nextPos - 1] = nextPos;
}
} else {//字典存在则根据查询结果数量计算最大概率路径
for (std::size_t idx = 0; idx < num_results; ++idx) {
auto & match = result_pairs[idx];
if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
continue;
}
auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
if (char_num > max_word_len) {
continue;
}
auto pValue = &elements_ptr_[match.value];
val = pValue->weight;
if (1 == char_num) {
if (nextPos < str_size) {
val += max_weight[nextPos];
}
if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
max_weight[nextPos - 1] = val;
max_next[nextPos - 1] = nextPos;
}
} else {
if (nextPos - 1 + char_num < str_size) {
val += max_weight[nextPos - 1 + char_num];
}
if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) {
max_weight[nextPos - 1] = val;
max_next[nextPos - 1] = nextPos - 1 + char_num;
}
}
}
}
}
for (size_t i = 0; i < str_size;) {//统计动态规划结果
assert(max_next[i] > i);
assert(max_next[i] <= str_size);
WordRange wr(begin + i, begin + max_next[i] - 1);
words.push_back(wr);
i = max_next[i];
}
}
double GetMinWeight() const {
return min_weight_;
}
void SetMinWeight(double d) {
min_weight_ = d ;
}
bool InitBuildDat(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
BuildDatCache(elements, dat_cache_file, md5);
return InitAttachDat(dat_cache_file, md5);
}
bool InitBuildDat(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
BuildDatCache(elements, dat_cache_file, md5);
return InitIdfAttachDat(dat_cache_file, md5);
}
bool InitAttachDat(const string & dat_cache_file, const string & md5) {
mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
if (mmap_fd_ < 0) {
return false;
}
const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
assert(seek_off >= 0);
mmap_length_ = seek_off;
mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
assert(MAP_FAILED != mmap_addr_);
assert(mmap_length_ >= sizeof(CacheFileHeader));
CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
elements_num_ = header.elements_num;
min_weight_ = header.min_weight;
assert(sizeof(header.md5_hex) == md5.size());
if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
return false;
}
assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(DatMemElem) + header.dat_size * dat_.unit_size());
elements_ptr_ = (const DatMemElem *)(mmap_addr_ + sizeof(header));
const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(DatMemElem) * elements_num_;
dat_.set_array(dat_ptr, header.dat_size);
return true;
}
bool InitIdfAttachDat(const string & dat_cache_file, const string & md5) {
mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
if (mmap_fd_ < 0) {
return false;
}
const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
assert(seek_off >= 0);
mmap_length_ = seek_off;
mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
assert(MAP_FAILED != mmap_addr_);
assert(mmap_length_ >= sizeof(CacheFileHeader));
CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
elements_num_ = header.elements_num;
min_weight_ = header.min_weight;
assert(sizeof(header.md5_hex) == md5.size());
if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
return false;
}
assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double) + header.dat_size * dat_.unit_size());
idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header));
const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_;
dat_.set_array(dat_ptr, header.dat_size);
return true;
}
private:
void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
std::sort(elements.begin(), elements.end());
vector<const char*> keys_ptr_vec;
vector<int> values_vec;
vector<DatMemElem> mem_elem_vec;
keys_ptr_vec.reserve(elements.size());
values_vec.reserve(elements.size());
mem_elem_vec.reserve(elements.size());
CacheFileHeader header;
header.min_weight = min_weight_;
assert(sizeof(header.md5_hex) == md5.size());
memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
for (size_t i = 0; i < elements.size(); ++i) {
keys_ptr_vec.push_back(elements[i].word.data());
values_vec.push_back(i);
mem_elem_vec.push_back(DatMemElem());
auto & mem_elem = mem_elem_vec.back();
mem_elem.weight = elements[i].weight;
mem_elem.SetTag(elements[i].tag);
}
auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
assert(0 == ret);
header.elements_num = mem_elem_vec.size();
header.dat_size = dat_.size();
{
string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
::umask(S_IWGRP | S_IWOTH);
//const int fd =::mkstemp(&tmp_filepath[0]);
//原mkstemp用法有误已修复--jxx20210519
const int fd =::mkstemp((char *)tmp_filepath.data());
qDebug() << "mkstemp :" << errno << tmp_filepath.data();
assert(fd >= 0);
::fchmod(fd, 0644);
auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
write_bytes += ::write(fd, dat_.array(), dat_.total_size());
assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
::close(fd);
const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
assert(0 == rename_ret);
}
}
void BuildDatCache(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
std::sort(elements.begin(), elements.end());
vector<const char*> keys_ptr_vec;
vector<int> values_vec;
vector<double> mem_elem_vec;
keys_ptr_vec.reserve(elements.size());
values_vec.reserve(elements.size());
mem_elem_vec.reserve(elements.size());
CacheFileHeader header;
header.min_weight = min_weight_;
assert(sizeof(header.md5_hex) == md5.size());
memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
for (size_t i = 0; i < elements.size(); ++i) {
keys_ptr_vec.push_back(elements[i].word.data());
values_vec.push_back(i);
mem_elem_vec.push_back(elements[i].idf);
}
auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
assert(0 == ret);
header.elements_num = mem_elem_vec.size();
header.dat_size = dat_.size();
{
string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
::umask(S_IWGRP | S_IWOTH);
//const int fd =::mkstemp(&tmp_filepath[0]);
//原mkstemp用法有误已修复--jxx20210519
const int fd =::mkstemp((char *)tmp_filepath.data());
qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
assert(fd >= 0);
::fchmod(fd, 0644);
auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(double) * mem_elem_vec.size());
write_bytes += ::write(fd, dat_.array(), dat_.total_size());
assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(double) + dat_.total_size());
::close(fd);
const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
assert(0 == rename_ret);
}
}
DatTrie(const DatTrie &);
DatTrie &operator=(const DatTrie &);
private:
JiebaDAT dat_;
const DatMemElem * elements_ptr_ = nullptr;
const double * idf_elements_ptr_= nullptr;
size_t elements_num_ = 0;
double min_weight_ = 0;
int mmap_fd_ = -1;
size_t mmap_length_ = 0;
char * mmap_addr_ = nullptr;
};
inline string CalcFileListMD5(const string & files_list, size_t & file_size_sum) {
limonp::MD5 md5;
const auto files = limonp::Split(files_list, "|;");
file_size_sum = 0;
for (auto const & local_path : files) {
const int fd = ::open(local_path.c_str(), O_RDONLY);
if( fd < 0){
continue;
}
auto const len = ::lseek(fd, 0, SEEK_END);
if (len > 0) {
void * addr = ::mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
assert(MAP_FAILED != addr);
md5.Update((unsigned char *) addr, len);
file_size_sum += len;
::munmap(addr, len);
}
::close(fd);
}
md5.Final();
return string(md5.digestChars);
}
}

View File

@ -1,23 +1,4 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_DICT_TRIE_HPP
#define CPPJIEBA_DICT_TRIE_HPP
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
@ -31,8 +12,8 @@
#include "limonp/StringUtil.hpp" #include "limonp/StringUtil.hpp"
#include "limonp/Logging.hpp" #include "limonp/Logging.hpp"
#include "Unicode.hpp" #include "Unicode.hpp"
#include "Trie.hpp" #include "DatTrie.hpp"
#include <QDebug>
namespace cppjieba { namespace cppjieba {
using namespace limonp; using namespace limonp;
@ -50,58 +31,29 @@ public:
WordWeightMax, WordWeightMax,
}; // enum UserWordWeightOption }; // enum UserWordWeightOption
DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) { DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "",
Init(dict_path, user_dict_paths, user_word_weight_opt); UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
Init(dict_path, user_dict_paths, dat_cache_path, user_word_weight_opt);
} }
~DictTrie() { ~DictTrie() {}
delete trie_;
}
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) { const DatMemElem* Find(const string & word) const {
DictUnit node_info; return dat_.Find(word);
if(!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
return false;
}
active_node_infos_.push_back(node_info);
trie_->InsertNode(node_info.word, &active_node_infos_.back());
return true;
}
bool InsertUserWord(const string& word, int freq, const string& tag = UNKNOWN_TAG) {
DictUnit node_info;
double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
if(!MakeNodeInfo(node_info, word, weight, tag)) {
return false;
}
active_node_infos_.push_back(node_info);
trie_->InsertNode(node_info.word, &active_node_infos_.back());
return true;
}
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
return trie_->Find(begin, end);
} }
void Find(RuneStrArray::const_iterator begin, void Find(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,
vector<struct Dag>&res, vector<struct DatDag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const { size_t max_word_len = MAX_WORD_LENGTH) const {
trie_->Find(begin, end, res, max_word_len); dat_.Find(begin, end, res, max_word_len);
} }
bool Find(const string& word) { void Find(RuneStrArray::const_iterator begin,
const DictUnit *tmp = NULL; RuneStrArray::const_iterator end,
RuneStrArray runes; vector<WordRange>& words,
if(!DecodeRunesInString(word, runes)) { size_t max_word_len = MAX_WORD_LENGTH) const {
XLOG(ERROR) << "Decode failed."; dat_.Find(begin, end, words, max_word_len);
}
tmp = Find(runes.begin(), runes.end());
if(tmp == NULL) {
return false;
} else {
return true;
}
} }
bool IsUserDictSingleChineseWord(const Rune& word) const { bool IsUserDictSingleChineseWord(const Rune& word) const {
@ -109,182 +61,176 @@ public:
} }
double GetMinWeight() const { double GetMinWeight() const {
return min_weight_; return dat_.GetMinWeight();
} }
void InserUserDictNode(const string& line) { size_t GetTotalDictSize() const {
return total_dict_size_;
}
void InserUserDictNode(const string& line, bool saveNodeInfo = true) {
vector<string> buf; vector<string> buf;
DictUnit node_info; DatElement node_info;
Split(line, buf, " "); Split(line, buf, " ");
if(buf.size() == 1) {
MakeNodeInfo(node_info, if (buf.size() == 0) {
buf[0], return;
user_word_default_weight_,
UNKNOWN_TAG);
} else if(buf.size() == 2) {
MakeNodeInfo(node_info,
buf[0],
user_word_default_weight_,
buf[1]);
} else if(buf.size() == 3) {
int freq = atoi(buf[1].c_str());
assert(freq_sum_ > 0.0);
double weight = log(1.0 * freq / freq_sum_);
MakeNodeInfo(node_info, buf[0], weight, buf[2]);
} }
static_node_infos_.push_back(node_info);
if(node_info.word.size() == 1) { node_info.word = buf[0];
user_dict_single_chinese_word_.insert(node_info.word[0]); node_info.weight = user_word_default_weight_;
node_info.tag = UNKNOWN_TAG;
if (buf.size() == 2) {
node_info.tag = buf[1];
} else if (buf.size() == 3) {
if (freq_sum_ > 0.0) {
const int freq = atoi(buf[1].c_str());
node_info.weight = log(1.0 * freq / freq_sum_);
node_info.tag = buf[2];
}
}
if (saveNodeInfo) {
static_node_infos_.push_back(node_info);
}
if (Utf8CharNum(node_info.word) == 1) {
RuneArray word;
if (DecodeRunesInString(node_info.word, word)) {
user_dict_single_chinese_word_.insert(word[0]);
} else {
XLOG(ERROR) << "Decode " << node_info.word << " failed.";
}
} }
} }
void LoadUserDict(const vector<string>& buf) { void LoadUserDict(const string& filePaths, bool saveNodeInfo = true) {
for(size_t i = 0; i < buf.size(); i++) {
InserUserDictNode(buf[i]);
}
}
void LoadUserDict(const set<string>& buf) {
std::set<string>::const_iterator iter;
for(iter = buf.begin(); iter != buf.end(); iter++) {
InserUserDictNode(*iter);
}
}
void LoadUserDict(const string& filePaths) {
vector<string> files = limonp::Split(filePaths, "|;"); vector<string> files = limonp::Split(filePaths, "|;");
size_t lineno = 0;
for(size_t i = 0; i < files.size(); i++) { for (size_t i = 0; i < files.size(); i++) {
ifstream ifs(files[i].c_str()); ifstream ifs(files[i].c_str());
XCHECK(ifs.is_open()) << "open " << files[i] << " failed"; XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
string line; string line;
for(; getline(ifs, line); lineno++) { for (; getline(ifs, line);) {
if(line.size() == 0) { if (line.size() == 0) {
continue; continue;
} }
InserUserDictNode(line);
InserUserDictNode(line, saveNodeInfo);
} }
} }
} }
private: private:
void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) { void Init(const string& dict_path, const string& user_dict_paths, string dat_cache_path,
LoadDict(dict_path); UserWordWeightOption user_word_weight_opt) {
const auto dict_list = dict_path + "|" + user_dict_paths;
size_t file_size_sum = 0;
const string md5 = CalcFileListMD5(dict_list, file_size_sum);
if (dat_cache_path.empty()) {
//未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache";
}
QString path = QString::fromStdString(dat_cache_path);
qDebug() << "#########Dict path:" << path;
if (dat_.InitAttachDat(dat_cache_path, md5)) {
LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_;
total_dict_size_ = file_size_sum;
return;
}
LoadDefaultDict(dict_path);
freq_sum_ = CalcFreqSum(static_node_infos_); freq_sum_ = CalcFreqSum(static_node_infos_);
CalculateWeight(static_node_infos_, freq_sum_); CalculateWeight(static_node_infos_, freq_sum_);
SetStaticWordWeights(user_word_weight_opt); double min_weight = 0;
SetStaticWordWeights(user_word_weight_opt, min_weight);
dat_.SetMinWeight(min_weight);
if(user_dict_paths.size()) { LoadUserDict(user_dict_paths);
LoadUserDict(user_dict_paths); const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
} assert(build_ret);
Shrink(static_node_infos_); total_dict_size_ = file_size_sum;
CreateTrie(static_node_infos_); vector<DatElement>().swap(static_node_infos_);
} }
void CreateTrie(const vector<DictUnit>& dictUnits) { void LoadDefaultDict(const string& filePath) {
assert(dictUnits.size());
vector<Unicode> words;
vector<const DictUnit*> valuePointers;
for(size_t i = 0 ; i < dictUnits.size(); i ++) {
words.push_back(dictUnits[i].word);
valuePointers.push_back(&dictUnits[i]);
}
trie_ = new Trie(words, valuePointers);
}
bool MakeNodeInfo(DictUnit& node_info,
const string& word,
double weight,
const string& tag) {
if(!DecodeRunesInString(word, node_info.word)) {
XLOG(ERROR) << "Decode " << word << " failed.";
return false;
}
node_info.weight = weight;
node_info.tag = tag;
return true;
}
void LoadDict(const string& filePath) {
ifstream ifs(filePath.c_str()); ifstream ifs(filePath.c_str());
XCHECK(ifs.is_open()) << "open " << filePath << " failed."; XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
string line; string line;
vector<string> buf; vector<string> buf;
DictUnit node_info; for (; getline(ifs, line);) {
for(size_t lineno = 0; getline(ifs, line); lineno++) {
Split(line, buf, " "); Split(line, buf, " ");
XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line; XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
MakeNodeInfo(node_info, DatElement node_info;
buf[0], node_info.word = buf[0];
atof(buf[1].c_str()), node_info.weight = atof(buf[1].c_str());
buf[2]); node_info.tag = buf[2];
static_node_infos_.push_back(node_info); static_node_infos_.push_back(node_info);
} }
} }
static bool WeightCompare(const DictUnit& lhs, const DictUnit& rhs) { static bool WeightCompare(const DatElement& lhs, const DatElement& rhs) {
return lhs.weight < rhs.weight; return lhs.weight < rhs.weight;
} }
void SetStaticWordWeights(UserWordWeightOption option) { void SetStaticWordWeights(UserWordWeightOption option, double & min_weight) {
XCHECK(!static_node_infos_.empty()); XCHECK(!static_node_infos_.empty());
vector<DictUnit> x = static_node_infos_; vector<DatElement> x = static_node_infos_;
sort(x.begin(), x.end(), WeightCompare); sort(x.begin(), x.end(), WeightCompare);
min_weight_ = x[0].weight; if(x.empty()){
max_weight_ = x[x.size() - 1].weight; return;
median_weight_ = x[x.size() / 2].weight; }
switch(option) { min_weight = x[0].weight;
case WordWeightMin: const double max_weight_ = x[x.size() - 1].weight;
user_word_default_weight_ = min_weight_; const double median_weight_ = x[x.size() / 2].weight;
break;
case WordWeightMedian: switch (option) {
user_word_default_weight_ = median_weight_; case WordWeightMin:
break; user_word_default_weight_ = min_weight;
default: break;
user_word_default_weight_ = max_weight_;
break; case WordWeightMedian:
user_word_default_weight_ = median_weight_;
break;
default:
user_word_default_weight_ = max_weight_;
break;
} }
} }
double CalcFreqSum(const vector<DictUnit>& node_infos) const { double CalcFreqSum(const vector<DatElement>& node_infos) const {
double sum = 0.0; double sum = 0.0;
for(size_t i = 0; i < node_infos.size(); i++) {
for (size_t i = 0; i < node_infos.size(); i++) {
sum += node_infos[i].weight; sum += node_infos[i].weight;
} }
return sum; return sum;
} }
void CalculateWeight(vector<DictUnit>& node_infos, double sum) const { void CalculateWeight(vector<DatElement>& node_infos, double sum) const {
assert(sum > 0.0); for (size_t i = 0; i < node_infos.size(); i++) {
for(size_t i = 0; i < node_infos.size(); i++) { DatElement& node_info = node_infos[i];
DictUnit& node_info = node_infos[i];
assert(node_info.weight > 0.0); assert(node_info.weight > 0.0);
node_info.weight = log(double(node_info.weight) / sum); node_info.weight = log(double(node_info.weight) / sum);
} }
} }
void Shrink(vector<DictUnit>& units) const { private:
vector<DictUnit>(units.begin(), units.end()).swap(units); vector<DatElement> static_node_infos_;
} size_t total_dict_size_ = 0;
DatTrie dat_;
vector<DictUnit> static_node_infos_;
deque<DictUnit> active_node_infos_; // must not be vector
Trie * trie_;
double freq_sum_; double freq_sum_;
double min_weight_;
double max_weight_;
double median_weight_;
double user_word_default_weight_; double user_word_default_weight_;
unordered_set<Rune> user_dict_single_chinese_word_; unordered_set<Rune> user_dict_single_chinese_word_;
}; };
} }
#endif

View File

@ -1,23 +1,4 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_FULLSEGMENT_H
#define CPPJIEBA_FULLSEGMENT_H
#include <algorithm> #include <algorithm>
#include <set> #include <set>
@ -30,82 +11,48 @@
namespace cppjieba { namespace cppjieba {
class FullSegment: public SegmentBase { class FullSegment: public SegmentBase {
public: public:
FullSegment(const string& dictPath) {
dictTrie_ = new DictTrie(dictPath);
isNeedDestroy_ = true;
}
FullSegment(const DictTrie* dictTrie) FullSegment(const DictTrie* dictTrie)
: dictTrie_(dictTrie), isNeedDestroy_(false) { : dictTrie_(dictTrie) {
assert(dictTrie_); assert(dictTrie_);
} }
~FullSegment() { ~FullSegment() { }
if(isNeedDestroy_) {
delete dictTrie_;
}
}
void Cut(const string& sentence,
vector<string>& words) const {
vector<Word> tmp;
Cut(sentence, tmp);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence,
vector<Word>& words) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while(pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<WordRange>& res) const {
// result of searching in trie tree
LocalVector<pair<size_t, const DictUnit*> > tRes;
// max index of res's words virtual void Cut(RuneStrArray::const_iterator begin,
size_t maxIdx = 0; RuneStrArray::const_iterator end,
vector<WordRange>& res, bool, size_t) const override {
// always equals to (uItr - begin)
size_t uIdx = 0;
// tmp variables
size_t wordLen = 0;
assert(dictTrie_); assert(dictTrie_);
vector<struct Dag> dags; vector<struct DatDag> dags;
dictTrie_->Find(begin, end, dags); dictTrie_->Find(begin, end, dags);
for(size_t i = 0; i < dags.size(); i++) { size_t max_word_end_pos = 0;
for(size_t j = 0; j < dags[i].nexts.size(); j++) {
size_t nextoffset = dags[i].nexts[j].first; for (size_t i = 0; i < dags.size(); i++) {
for (const auto & kv : dags[i].nexts) {
const size_t nextoffset = kv.first - 1;
assert(nextoffset < dags.size()); assert(nextoffset < dags.size());
const DictUnit* du = dags[i].nexts[j].second; const auto wordLen = nextoffset - i + 1;
if(du == NULL) { const bool is_not_covered_single_word = ((dags[i].nexts.size() == 1) && (max_word_end_pos <= i));
if(dags[i].nexts.size() == 1 && maxIdx <= uIdx) { const bool is_oov = (nullptr == kv.second); //Out-of-Vocabulary
WordRange wr(begin + i, begin + nextoffset);
res.push_back(wr); if ((is_not_covered_single_word) || ((not is_oov) && (wordLen >= 2))) {
} WordRange wr(begin + i, begin + nextoffset);
} else { res.push_back(wr);
wordLen = du->word.size();
if(wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
WordRange wr(begin + i, begin + nextoffset);
res.push_back(wr);
}
} }
maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx;
max_word_end_pos = max(max_word_end_pos, nextoffset + 1);
} }
uIdx++;
} }
} }
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
size_t) const override {
}
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
size_t) const override {
}
private: private:
const DictTrie* dictTrie_; const DictTrie* dictTrie_;
bool isNeedDestroy_;
}; };
} }
#endif

View File

@ -1,26 +1,6 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_HMMMODEL_H
#define CPPJIEBA_HMMMODEL_H
#include "limonp/StringUtil.hpp" #include "limonp/StringUtil.hpp"
#include "Trie.hpp"
namespace cppjieba { namespace cppjieba {
@ -59,16 +39,18 @@ struct HMMModel {
XCHECK(GetLine(ifile, line)); XCHECK(GetLine(ifile, line));
Split(line, tmp, " "); Split(line, tmp, " ");
XCHECK(tmp.size() == STATUS_SUM); XCHECK(tmp.size() == STATUS_SUM);
for(size_t j = 0; j < tmp.size(); j++) {
for (size_t j = 0; j < tmp.size(); j++) {
startProb[j] = atof(tmp[j].c_str()); startProb[j] = atof(tmp[j].c_str());
} }
//Load transProb //Load transProb
for(size_t i = 0; i < STATUS_SUM; i++) { for (size_t i = 0; i < STATUS_SUM; i++) {
XCHECK(GetLine(ifile, line)); XCHECK(GetLine(ifile, line));
Split(line, tmp, " "); Split(line, tmp, " ");
XCHECK(tmp.size() == STATUS_SUM); XCHECK(tmp.size() == STATUS_SUM);
for(size_t j = 0; j < STATUS_SUM; j++) {
for (size_t j = 0; j < tmp.size(); j++) {
transProb[i][j] = atof(tmp[j].c_str()); transProb[i][j] = atof(tmp[j].c_str());
} }
} }
@ -92,43 +74,55 @@ struct HMMModel {
double GetEmitProb(const EmitProbMap* ptMp, Rune key, double GetEmitProb(const EmitProbMap* ptMp, Rune key,
double defVal)const { double defVal)const {
EmitProbMap::const_iterator cit = ptMp->find(key); EmitProbMap::const_iterator cit = ptMp->find(key);
if(cit == ptMp->end()) {
if (cit == ptMp->end()) {
return defVal; return defVal;
} }
return cit->second; return cit->second;
} }
bool GetLine(ifstream& ifile, string& line) { bool GetLine(ifstream& ifile, string& line) {
while(getline(ifile, line)) { while (getline(ifile, line)) {
Trim(line); Trim(line);
if(line.empty()) {
if (line.empty()) {
continue; continue;
} }
if(StartsWith(line, "#")) {
if (StartsWith(line, "#")) {
continue; continue;
} }
return true; return true;
} }
return false; return false;
} }
bool LoadEmitProb(const string& line, EmitProbMap& mp) { bool LoadEmitProb(const string& line, EmitProbMap& mp) {
if(line.empty()) { if (line.empty()) {
return false; return false;
} }
vector<string> tmp, tmp2; vector<string> tmp, tmp2;
Unicode unicode; RuneArray unicode;
Split(line, tmp, ","); Split(line, tmp, ",");
for(size_t i = 0; i < tmp.size(); i++) {
for (size_t i = 0; i < tmp.size(); i++) {
Split(tmp[i], tmp2, ":"); Split(tmp[i], tmp2, ":");
if(2 != tmp2.size()) {
if (2 != tmp2.size()) {
XLOG(ERROR) << "emitProb illegal."; XLOG(ERROR) << "emitProb illegal.";
return false; return false;
} }
if(!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
XLOG(ERROR) << "TransCode failed."; XLOG(ERROR) << "TransCode failed.";
return false; return false;
} }
mp[unicode[0]] = atof(tmp2[1].c_str()); mp[unicode[0]] = atof(tmp2[1].c_str());
} }
return true; return true;
} }
@ -144,4 +138,3 @@ struct HMMModel {
} // namespace cppjieba } // namespace cppjieba
#endif

View File

@ -1,23 +1,4 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIBEA_HMMSEGMENT_H
#define CPPJIBEA_HMMSEGMENT_H
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
@ -29,58 +10,40 @@
namespace cppjieba { namespace cppjieba {
class HMMSegment: public SegmentBase { class HMMSegment: public SegmentBase {
public: public:
HMMSegment(const string& filePath)
: model_(new HMMModel(filePath)), isNeedDestroy_(true) {
}
HMMSegment(const HMMModel* model) HMMSegment(const HMMModel* model)
: model_(model), isNeedDestroy_(false) { : model_(model) {
}
~HMMSegment() {
if(isNeedDestroy_) {
delete model_;
}
} }
~HMMSegment() { }
void Cut(const string& sentence, virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool,
vector<string>& words) const { size_t) const override {
vector<Word> tmp;
Cut(sentence, tmp);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence,
vector<Word>& words) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while(pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
RuneStrArray::const_iterator left = begin; RuneStrArray::const_iterator left = begin;
RuneStrArray::const_iterator right = begin; RuneStrArray::const_iterator right = begin;
while(right != end) {
if(right->rune < 0x80) { while (right != end) {
if(left != right) { if (right->rune < 0x80) { //asc码
if (left != right) {
InternalCut(left, right, res); InternalCut(left, right, res);
} }
left = right; left = right;
do { do {
right = SequentialLetterRule(left, end); right = SequentialLetterRule(left, end);//非英文字符则返回left否则返回left后非英文字母的位置
if(right != left) {
if (right != left) {
break; break;
} }
right = NumbersRule(left, end);
if(right != left) { right = NumbersRule(left, end);//非数字则返回left否则返回left后非数字的位置
if (right != left) {
break; break;
} }
right ++; right ++;
} while(false); } while (false);
WordRange wr(left, right - 1); WordRange wr(left, right - 1);
res.push_back(wr); res.push_back(wr);
left = right; left = right;
@ -88,45 +51,64 @@ public:
right++; right++;
} }
} }
if(left != right) {
if (left != right) {
InternalCut(left, right, res); InternalCut(left, right, res);
} }
} }
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
size_t) const override {
}
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
size_t) const override {
}
private: private:
// sequential letters rule // sequential letters rule
RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const { RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end) const {
Rune x = begin->rune; Rune x = begin->rune;
if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
begin ++; begin ++;
} else { } else {
return begin; return begin;
} }
while(begin != end) {
while (begin != end) {
x = begin->rune; x = begin->rune;
if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
begin ++; begin ++;
} else { } else {
break; break;
} }
} }
return begin; return begin;
} }
// //
RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const { RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
Rune x = begin->rune; Rune x = begin->rune;
if('0' <= x && x <= '9') {
if ('0' <= x && x <= '9') {
begin ++; begin ++;
} else { } else {
return begin; return begin;
} }
while(begin != end) {
while (begin != end) {
x = begin->rune; x = begin->rune;
if(('0' <= x && x <= '9') || x == '.') {
if (('0' <= x && x <= '9') || x == '.') {
begin++; begin++;
} else { } else {
break; break;
} }
} }
return begin; return begin;
} }
void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const { void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
@ -135,8 +117,9 @@ private:
RuneStrArray::const_iterator left = begin; RuneStrArray::const_iterator left = begin;
RuneStrArray::const_iterator right; RuneStrArray::const_iterator right;
for(size_t i = 0; i < status.size(); i++) {
if(status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i]) for (size_t i = 0; i < status.size(); i++) {
if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
right = begin + i + 1; right = begin + i + 1;
WordRange wr(left, right - 1); WordRange wr(left, right - 1);
res.push_back(wr); res.push_back(wr);
@ -155,27 +138,31 @@ private:
size_t now, old, stat; size_t now, old, stat;
double tmp, endE, endS; double tmp, endE, endS;
vector<int> path(XYSize); //vector<int> path(XYSize);
vector<double> weight(XYSize); //vector<double> weight(XYSize);
int path[XYSize];
double weight[XYSize];
//start //start
for(size_t y = 0; y < Y; y++) { for (size_t y = 0; y < Y; y++) {
weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE); weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
path[0 + y * X] = -1; path[0 + y * X] = -1;
} }
double emitProb; double emitProb;
for(size_t x = 1; x < X; x++) { for (size_t x = 1; x < X; x++) {
for(size_t y = 0; y < Y; y++) { for (size_t y = 0; y < Y; y++) {
now = x + y * X; now = x + y * X;
weight[now] = MIN_DOUBLE; weight[now] = MIN_DOUBLE;
path[now] = HMMModel::E; // warning path[now] = HMMModel::E; // warning
emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE); emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE);
for(size_t preY = 0; preY < Y; preY++) {
for (size_t preY = 0; preY < Y; preY++) {
old = x - 1 + preY * X; old = x - 1 + preY * X;
tmp = weight[old] + model_->transProb[preY][y] + emitProb; tmp = weight[old] + model_->transProb[preY][y] + emitProb;
if(tmp > weight[now]) {
if (tmp > weight[now]) {
weight[now] = tmp; weight[now] = tmp;
path[now] = preY; path[now] = preY;
} }
@ -186,23 +173,23 @@ private:
endE = weight[X - 1 + HMMModel::E * X]; endE = weight[X - 1 + HMMModel::E * X];
endS = weight[X - 1 + HMMModel::S * X]; endS = weight[X - 1 + HMMModel::S * X];
stat = 0; stat = 0;
if(endE >= endS) {
if (endE >= endS) {
stat = HMMModel::E; stat = HMMModel::E;
} else { } else {
stat = HMMModel::S; stat = HMMModel::S;
} }
status.resize(X); status.resize(X);
for(int x = X - 1 ; x >= 0; x--) {
for (int x = X - 1 ; x >= 0; x--) {
status[x] = stat; status[x] = stat;
stat = path[x + stat * X]; stat = path[x + stat * X];
} }
} }
const HMMModel* model_; const HMMModel* model_;
bool isNeedDestroy_;
}; // class HMMSegment }; // class HMMSegment
} // namespace cppjieba } // namespace cppjieba
#endif

View File

@ -0,0 +1,134 @@
#pragma once
#include <iostream>
#include <fstream>
#include <map>
#include <string>
#include <cstring>
#include <cstdlib>
#include <stdint.h>
#include <cmath>
#include <limits>
#include "limonp/StringUtil.hpp"
#include "limonp/Logging.hpp"
#include "Unicode.hpp"
#include "DatTrie.hpp"
#include <QDebug>
namespace cppjieba {
using namespace limonp;
const size_t IDF_COLUMN_NUM = 2;
class IdfTrie {
public:
enum UserWordWeightOption {
WordWeightMin,
WordWeightMedian,
WordWeightMax,
}; // enum UserWordWeightOption
IdfTrie(const string& dict_path, const string & dat_cache_path = "",
UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
Init(dict_path, dat_cache_path, user_word_weight_opt);
}
~IdfTrie() {}
double Find(const string & word, std::size_t length = 0, std::size_t node_pos = 0) const {
return dat_.Find(word, length, node_pos);
}
void Find(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<struct DatDag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const {
dat_.Find(begin, end, res, max_word_len);
}
bool IsUserDictSingleChineseWord(const Rune& word) const {
return IsIn(user_dict_single_chinese_word_, word);
}
double GetMinWeight() const {
return dat_.GetMinWeight();
}
size_t GetTotalDictSize() const {
return total_dict_size_;
}
private:
void Init(const string& dict_path, string dat_cache_path,
UserWordWeightOption user_word_weight_opt) {
size_t file_size_sum = 0;
const string md5 = CalcFileListMD5(dict_path, file_size_sum);
if (dat_cache_path.empty()) {
//未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache";
}
QString path = QString::fromStdString(dat_cache_path);
qDebug() << "#########Idf path:" << path;
if (dat_.InitIdfAttachDat(dat_cache_path, md5)) {
total_dict_size_ = file_size_sum;
return;
}
LoadDefaultIdf(dict_path);
double idf_sum_ = CalcIdfSum(static_node_infos_);
assert(static_node_infos_.size());
idfAverage_ = idf_sum_ / static_node_infos_.size();
assert(idfAverage_ > 0.0);
double min_weight = 0;
dat_.SetMinWeight(min_weight);
const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
assert(build_ret);
total_dict_size_ = file_size_sum;
vector<IdfElement>().swap(static_node_infos_);
}
void LoadDefaultIdf(const string& filePath) {
ifstream ifs(filePath.c_str());
if(not ifs.is_open()){
return ;
}
XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
string line;
vector<string> buf;
size_t lineno = 0;
for (; getline(ifs, line); lineno++) {
if (line.empty()) {
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
continue;
}
Split(line, buf, " ");
XCHECK(buf.size() == IDF_COLUMN_NUM) << "split result illegal, line:" << line;
IdfElement node_info;
node_info.word = buf[0];
node_info.idf = atof(buf[1].c_str());
static_node_infos_.push_back(node_info);
}
}
double CalcIdfSum(const vector<IdfElement>& node_infos) const {
double sum = 0.0;
for (size_t i = 0; i < node_infos.size(); i++) {
sum += node_infos[i].idf;
}
return sum;
}
public:
double idfAverage_;
private:
vector<IdfElement> static_node_infos_;
size_t total_dict_size_ = 0;
DatTrie dat_;
unordered_set<Rune> user_dict_single_chinese_word_;
};
}

View File

@ -1,24 +1,6 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEAB_JIEBA_H
#define CPPJIEAB_JIEBA_H
#include <memory>
#include "QuerySegment.hpp" #include "QuerySegment.hpp"
#include "KeywordExtractor.hpp" #include "KeywordExtractor.hpp"
@ -29,56 +11,48 @@ public:
Jieba(const string& dict_path, Jieba(const string& dict_path,
const string& model_path, const string& model_path,
const string& user_dict_path, const string& user_dict_path,
const string& idfPath, const string& idfPath = "",
const string& stopWordPath) const string& stopWordPath = "",
: dict_trie_(dict_path, user_dict_path), const string& dat_cache_path = "")
: dict_trie_(dict_path, user_dict_path, dat_cache_path),
model_(model_path), model_(model_path),
mp_seg_(&dict_trie_), mp_seg_(&dict_trie_),
hmm_seg_(&model_), hmm_seg_(&model_),
mix_seg_(&dict_trie_, &model_), mix_seg_(&dict_trie_, &model_, stopWordPath),
full_seg_(&dict_trie_), full_seg_(&dict_trie_),
query_seg_(&dict_trie_, &model_), query_seg_(&dict_trie_, &model_, stopWordPath),
extractor(&dict_trie_, &model_, idfPath, stopWordPath) { extractor(&dict_trie_, &model_, idfPath, dat_cache_path,stopWordPath){ }
~Jieba() { }
}
~Jieba() {
}
struct LocWord {
string word;
size_t begin;
size_t end;
}; // struct LocWord
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const { void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
mix_seg_.Cut(sentence, words, hmm); mix_seg_.CutToStr(sentence, words, hmm);
} }
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const { void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
mix_seg_.Cut(sentence, words, hmm); mix_seg_.CutToWord(sentence, words, hmm);
} }
void CutAll(const string& sentence, vector<string>& words) const { void CutAll(const string& sentence, vector<string>& words) const {
full_seg_.Cut(sentence, words); full_seg_.CutToStr(sentence, words);
} }
void CutAll(const string& sentence, vector<Word>& words) const { void CutAll(const string& sentence, vector<Word>& words) const {
full_seg_.Cut(sentence, words); full_seg_.CutToWord(sentence, words);
} }
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const { void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
query_seg_.Cut(sentence, words, hmm); query_seg_.CutToStr(sentence, words, hmm);
} }
void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const { void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
query_seg_.Cut(sentence, words, hmm); query_seg_.CutToWord(sentence, words, hmm);
} }
void CutHMM(const string& sentence, vector<string>& words) const { void CutHMM(const string& sentence, vector<string>& words) const {
hmm_seg_.Cut(sentence, words); hmm_seg_.CutToStr(sentence, words);
} }
void CutHMM(const string& sentence, vector<Word>& words) const { void CutHMM(const string& sentence, vector<Word>& words) const {
hmm_seg_.Cut(sentence, words); hmm_seg_.CutToWord(sentence, words);
} }
void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const { void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
mp_seg_.Cut(sentence, words, max_word_len); mp_seg_.CutToStr(sentence, words, false, max_word_len);
} }
void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const { void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
mp_seg_.Cut(sentence, words, max_word_len); mp_seg_.CutToWord(sentence, words, false, max_word_len);
} }
void Tag(const string& sentence, vector<pair<string, string> >& words) const { void Tag(const string& sentence, vector<pair<string, string> >& words) const {
@ -87,16 +61,8 @@ public:
string LookupTag(const string &str) const { string LookupTag(const string &str) const {
return mix_seg_.LookupTag(str); return mix_seg_.LookupTag(str);
} }
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
return dict_trie_.InsertUserWord(word, tag);
}
bool InsertUserWord(const string& word, int freq, const string& tag = UNKNOWN_TAG) {
return dict_trie_.InsertUserWord(word, freq, tag);
}
bool Find(const string& word) { bool Find(const string& word) {
return dict_trie_.Find(word); return nullptr != dict_trie_.Find(word);
} }
void ResetSeparators(const string& s) { void ResetSeparators(const string& s) {
@ -116,18 +82,6 @@ public:
return &model_; return &model_;
} }
void LoadUserDict(const vector<string>& buf) {
dict_trie_.LoadUserDict(buf);
}
void LoadUserDict(const set<string>& buf) {
dict_trie_.LoadUserDict(buf);
}
void LoadUserDict(const string& path) {
dict_trie_.LoadUserDict(path);
}
private: private:
DictTrie dict_trie_; DictTrie dict_trie_;
HMMModel model_; HMMModel model_;
@ -145,4 +99,3 @@ public:
} // namespace cppjieba } // namespace cppjieba
#endif // CPPJIEAB_JIEBA_H

View File

@ -1,27 +1,8 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
#define CPPJIEBA_KEYWORD_EXTRACTOR_H
#include <cmath> #include <cmath>
#include <set>
#include "MixSegment.hpp" #include "MixSegment.hpp"
#include "IdfTrie.hpp"
namespace cppjieba { namespace cppjieba {
@ -31,141 +12,87 @@ using namespace std;
/*utf8*/ /*utf8*/
class KeywordExtractor { class KeywordExtractor {
public: public:
struct Word {
string word;
vector<size_t> offsets;
double weight;
}; // struct Word
KeywordExtractor(const string& dictPath,
const string& hmmFilePath,
const string& idfPath,
const string& stopWordPath,
const string& userDict = "")
: segment_(dictPath, hmmFilePath, userDict) {
LoadIdfDict(idfPath);
LoadStopWordDict(stopWordPath);
}
KeywordExtractor(const DictTrie* dictTrie, KeywordExtractor(const DictTrie* dictTrie,
const HMMModel* model, const HMMModel* model,
const string& idfPath, const string& idfPath,
const string& dat_cache_path,
const string& stopWordPath) const string& stopWordPath)
: segment_(dictTrie, model) { : segment_(dictTrie, model, stopWordPath),
LoadIdfDict(idfPath); idf_trie_(idfPath,dat_cache_path){
LoadStopWordDict(stopWordPath);
} }
~KeywordExtractor() { ~KeywordExtractor() {
} }
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const { void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
vector<Word> topWords; vector<KeyWord> topWords;
Extract(sentence, topWords, topN); Extract(sentence, topWords, topN);
for(size_t i = 0; i < topWords.size(); i++) {
for (size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(topWords[i].word); keywords.push_back(topWords[i].word);
} }
} }
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const { void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
vector<Word> topWords; vector<KeyWord> topWords;
Extract(sentence, topWords, topN); Extract(sentence, topWords, topN);
for(size_t i = 0; i < topWords.size(); i++) {
for (size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight)); keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
} }
} }
void Extract(const string& sentence, vector<Word>& keywords, size_t topN) const { void Extract(const string& sentence, vector<KeyWord>& keywords, size_t topN) const {
vector<string> words;
segment_.Cut(sentence, words);
map<string, Word> wordmap; unordered_map<string, KeyWord> wordmap;//插入字符串与Word的map相同string统计词频叠加权重
size_t offset = 0; PreFilter pre_filter(symbols_, sentence);
for(size_t i = 0; i < words.size(); ++i) { RuneStrArray::const_iterator null_p;
size_t t = offset; WordRange range(null_p, null_p);
offset += words[i].size(); bool isNull(false);
if(IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) { while (pre_filter.Next(range, isNull)) {
if (isNull) {
continue; continue;
} }
wordmap[words[i]].offsets.push_back(t); segment_.CutToStr(sentence, range, wordmap);
wordmap[words[i]].weight += 1.0;
}
if(offset != sentence.size()) {
XLOG(ERROR) << "words illegal";
return;
} }
keywords.clear(); keywords.clear();
keywords.reserve(wordmap.size()); keywords.reserve(wordmap.size());
for(map<string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first); for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
if(cit != idfMap_.end()) { double idf = idf_trie_.Find(itr->first);
itr->second.weight *= cit->second; if (-1 != idf) {//IDF词典查找
itr->second.weight *= idf;
} else { } else {
itr->second.weight *= idfAverage_; itr->second.weight *= idf_trie_.idfAverage_;
} }
itr->second.word = itr->first; itr->second.word = itr->first;
keywords.push_back(itr->second); keywords.push_back(itr->second);
} }
topN = min(topN, keywords.size()); topN = min(topN, keywords.size());
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare); partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
keywords.resize(topN); keywords.resize(topN);
} }
private: private:
void LoadIdfDict(const string& idfPath) {
ifstream ifs(idfPath.c_str());
XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
string line ;
vector<string> buf;
double idf = 0.0;
double idfSum = 0.0;
size_t lineno = 0;
for(; getline(ifs, line); lineno++) {
buf.clear();
if(line.empty()) {
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
continue;
}
Split(line, buf, " ");
if(buf.size() != 2) {
XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
continue;
}
idf = atof(buf[1].c_str());
idfMap_[buf[0]] = idf;
idfSum += idf;
} static bool Compare(const KeyWord& lhs, const KeyWord& rhs) {
assert(lineno);
idfAverage_ = idfSum / lineno;
assert(idfAverage_ > 0.0);
}
void LoadStopWordDict(const string& filePath) {
ifstream ifs(filePath.c_str());
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
string line ;
while(getline(ifs, line)) {
stopWords_.insert(line);
}
assert(stopWords_.size());
}
static bool Compare(const Word& lhs, const Word& rhs) {
return lhs.weight > rhs.weight; return lhs.weight > rhs.weight;
} }
MixSegment segment_; MixSegment segment_;
unordered_map<string, double> idfMap_; IdfTrie idf_trie_;
double idfAverage_;
unordered_set<string> stopWords_; unordered_set<Rune> symbols_;
}; // class KeywordExtractor }; // class KeywordExtractor
inline ostream& operator << (ostream& os, const KeywordExtractor::Word& word) { inline ostream& operator << (ostream& os, const KeyWord& word) {
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}"; return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
"}";
} }
} // namespace cppjieba } // namespace cppjieba
#endif

View File

@ -1,23 +1,4 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_MPSEGMENT_H
#define CPPJIEBA_MPSEGMENT_H
#include <algorithm> #include <algorithm>
#include <set> #include <set>
@ -31,63 +12,36 @@ namespace cppjieba {
class MPSegment: public SegmentTagged { class MPSegment: public SegmentTagged {
public: public:
MPSegment(const string& dictPath, const string& userDictPath = "")
: dictTrie_(new DictTrie(dictPath, userDictPath)), isNeedDestroy_(true) {
}
MPSegment(const DictTrie* dictTrie) MPSegment(const DictTrie* dictTrie)
: dictTrie_(dictTrie), isNeedDestroy_(false) { : dictTrie_(dictTrie) {
assert(dictTrie_); assert(dictTrie_);
} }
~MPSegment() { ~MPSegment() { }
if(isNeedDestroy_) {
delete dictTrie_; virtual void Cut(RuneStrArray::const_iterator begin,
} RuneStrArray::const_iterator end,
vector<WordRange>& words,
bool, size_t max_word_len) const override {
// vector<DatDag> dags;
// dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx
// CalcDP(dags);//动态规划Dynamic ProgrammingDP根据DAG计算最优动态规划路径--jxx
// CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx
dictTrie_->Find(begin, end, words, max_word_len);
} }
void Cut(const string& sentence, vector<string>& words) const { virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
Cut(sentence, words, MAX_WORD_LENGTH); size_t) const override {
}
void Cut(const string& sentence,
vector<string>& words,
size_t max_word_len) const {
vector<Word> tmp;
Cut(sentence, tmp, max_word_len);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence,
vector<Word>& words,
size_t max_word_len = MAX_WORD_LENGTH) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while(pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs, max_word_len);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<WordRange>& words,
size_t max_word_len = MAX_WORD_LENGTH) const {
vector<Dag> dags;
dictTrie_->Find(begin,
end,
dags,
max_word_len);
CalcDP(dags);
CutByDag(begin, end, dags, words);
} }
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
size_t) const override {
const DictTrie* GetDictTrie() const { }
const DictTrie* GetDictTrie() const override {
return dictTrie_; return dictTrie_;
} }
bool Tag(const string& src, vector<pair<string, string> >& res) const { bool Tag(const string& src, vector<pair<string, string> >& res) const override {
return tagger_.Tag(src, res, *this); return tagger_.Tag(src, res, *this);
} }
@ -95,61 +49,81 @@ public:
return dictTrie_->IsUserDictSingleChineseWord(value); return dictTrie_->IsUserDictSingleChineseWord(value);
} }
private: private:
void CalcDP(vector<Dag>& dags) const { /*
size_t nextPos; void CalcDP(vector<DatDag>& dags) const {
const DictUnit* p; double val(0);
double val; for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) {
rit->max_next = -1;
rit->max_weight = MIN_DOUBLE;
for(vector<Dag>::reverse_iterator rit = dags.rbegin(); rit != dags.rend(); rit++) { for (const auto & it : rit->nexts) {
rit->pInfo = NULL; const auto nextPos = it.first;
rit->weight = MIN_DOUBLE; val = dictTrie_->GetMinWeight();
assert(!rit->nexts.empty());
for(LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) { if (nullptr != it.second) {
nextPos = it->first; val = it.second->weight;
p = it->second;
val = 0.0;
if(nextPos + 1 < dags.size()) {
val += dags[nextPos + 1].weight;
} }
if(p) { if (nextPos < dags.size()) {
val += p->weight; val += dags[nextPos].max_weight;
} else {
val += dictTrie_->GetMinWeight();
} }
if(val > rit->weight) {
rit->pInfo = p; if ((nextPos <= dags.size()) && (val > rit->max_weight)) {
rit->weight = val; rit->max_weight = val;
rit->max_next = nextPos;
} }
} }
} }
} }
void CutByDag(RuneStrArray::const_iterator begin, */
RuneStrArray::const_iterator end, /* 倒叙方式重写CalcDP函数初步测试未发现问题*/
const vector<Dag>& dags, void CalcDP(vector<DatDag>& dags) const {
vector<WordRange>& words) const { double val(0);
size_t i = 0; size_t size = dags.size();
while(i < dags.size()) {
const DictUnit* p = dags[i].pInfo; for (size_t i = 0; i < size; i++) {
if(p) { dags[size - 1 - i].max_next = -1;
assert(p->word.size() >= 1); dags[size - 1 - i].max_weight = MIN_DOUBLE;
WordRange wr(begin + i, begin + i + p->word.size() - 1);
words.push_back(wr); for (const auto & it : dags[size - 1 - i].nexts) {
i += p->word.size(); const auto nextPos = it.first;
} else { //single chinese word val = dictTrie_->GetMinWeight();
WordRange wr(begin + i, begin + i);
words.push_back(wr); if (nullptr != it.second) {
i++; val = it.second->weight;
}
if (nextPos < dags.size()) {
val += dags[nextPos].max_weight;
}
if ((nextPos <= dags.size()) && (val > dags[size - 1 - i].max_weight)) {
dags[size - 1 - i].max_weight = val;
dags[size - 1 - i].max_next = nextPos;
}
} }
} }
} }
void CutByDag(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator,
const vector<DatDag>& dags,
vector<WordRange>& words) const {
for (size_t i = 0; i < dags.size();) {
const auto next = dags[i].max_next;
assert(next > i);
assert(next <= dags.size());
WordRange wr(begin + i, begin + next - 1);
words.push_back(wr);
i = next;
}
}
const DictTrie* dictTrie_; const DictTrie* dictTrie_;
bool isNeedDestroy_;
PosTagger tagger_; PosTagger tagger_;
}; // class MPSegment }; // class MPSegment
} // namespace cppjieba } // namespace cppjieba
#endif

View File

@ -1,23 +1,4 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_MIXSEGMENT_H
#define CPPJIEBA_MIXSEGMENT_H
#include <cassert> #include <cassert>
#include "MPSegment.hpp" #include "MPSegment.hpp"
@ -28,70 +9,52 @@
namespace cppjieba { namespace cppjieba {
class MixSegment: public SegmentTagged { class MixSegment: public SegmentTagged {
public: public:
MixSegment(const string& mpSegDict, const string& hmmSegDict, MixSegment(const DictTrie* dictTrie,
const string& userDict = "") const HMMModel* model,
: mpSeg_(mpSegDict, userDict), const string& stopWordPath)
hmmSeg_(hmmSegDict) {
}
MixSegment(const DictTrie* dictTrie, const HMMModel* model)
: mpSeg_(dictTrie), hmmSeg_(model) { : mpSeg_(dictTrie), hmmSeg_(model) {
LoadStopWordDict(stopWordPath);
} }
~MixSegment() { ~MixSegment() {}
}
void Cut(const string& sentence, vector<string>& words) const { virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
Cut(sentence, words, true); size_t) const override {
} if (!hmm) {
void Cut(const string& sentence, vector<string>& words, bool hmm) const { mpSeg_.CutRuneArray(begin, end, res);
vector<Word> tmp;
Cut(sentence, tmp, hmm);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while(pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs, hmm);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
if(!hmm) {
mpSeg_.Cut(begin, end, res);
return; return;
} }
vector<WordRange> words; vector<WordRange> words;
assert(end >= begin); assert(end >= begin);
words.reserve(end - begin); words.reserve(end - begin);
mpSeg_.Cut(begin, end, words); mpSeg_.CutRuneArray(begin, end, words);
vector<WordRange> hmmRes; vector<WordRange> hmmRes;
hmmRes.reserve(end - begin); hmmRes.reserve(end - begin);
for(size_t i = 0; i < words.size(); i++) {
for (size_t i = 0; i < words.size(); i++) {
//if mp Get a word, it's ok, put it into result //if mp Get a word, it's ok, put it into result
if(words[i].left != words[i].right || (words[i].left == words[i].right && mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) { if (words[i].left != words[i].right || (words[i].left == words[i].right &&
mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
res.push_back(words[i]); res.push_back(words[i]);
continue; continue;
} }
// if mp Get a single one and it is not in userdict, collect it in sequence // if mp Get a single one and it is not in userdict, collect it in sequence
size_t j = i; size_t j = i;
while(j < words.size() && words[j].left == words[j].right && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
while (j < words.size() && words[j].left == words[j].right &&
!mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
j++; j++;
} }
// Cut the sequence with hmm // Cut the sequence with hmm
assert(j - 1 >= i); assert(j - 1 >= i);
// TODO // TODO
hmmSeg_.Cut(words[i].left, words[j - 1].left + 1, hmmRes); hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
//put hmm result to result //put hmm result to result
for(size_t k = 0; k < hmmRes.size(); k++) { for (size_t k = 0; k < hmmRes.size(); k++) {
res.push_back(hmmRes[k]); res.push_back(hmmRes[k]);
} }
@ -103,11 +66,141 @@ public:
} }
} }
const DictTrie* GetDictTrie() const { virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
size_t) const override {
//目前hmm默认开启后期如有需要关闭再修改--jxx20210519
// if (!hmm) {
// mpSeg_.CutRuneArray(begin, end, res);
// return;
// }
vector<WordRange> words;
assert(end >= begin);
words.reserve(end - begin);
mpSeg_.CutRuneArray(begin, end, words);
vector<WordRange> hmmRes;
hmmRes.reserve(end - begin);
for (size_t i = 0; i < words.size(); i++) {
//if mp Get a word, it's ok, put it into result
if (words[i].left != words[i].right) {
res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
continue;
}
if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
|| i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
continue;
}
// if mp Get a single one and it is not in userdict, collect it in sequence
size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里i字符不是最后一个字符直接判定j字符
while (j < (words.size() - 1) && words[j].left == words[j].right &&
!mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
j++;
}
// Cut the sequence with hmm
assert(j - 1 >= i);
// TODO
hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
//put hmm result to result
for (size_t k = 0; k < hmmRes.size(); k++) {
res.push_back(GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right));
}
//clear tmp vars
hmmRes.clear();
//let i jump over this piece
i = j - 1;
}
}
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
size_t) const override {
vector<WordRange> words;
vector<WordRange> hmmRes;
assert(end >= begin);
if (3 == begin->len or 4 == begin->len) {
words.reserve(end - begin);
mpSeg_.CutRuneArray(begin, end, words);
hmmRes.reserve(words.size());
} else {
hmmRes.reserve(end - begin);
}
if (words.size() != 0) {//存在中文分词结果
for (size_t i = 0; i < words.size(); i++) {
string str = GetStringFromRunes(s, words[i].left, words[i].right);
if (stopWords_.find(str) != stopWords_.end()) {
continue;
}
if (words[i].left != words[i].right) {
res[str].offsets.push_back(words[i].left->offset);
res[str].weight += 1.0;
continue;
}
if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
|| i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
if (stopWords_.find(str) != stopWords_.end()) {
continue;
}
res[str].offsets.push_back(words[i].left->offset);
res[str].weight += 1.0;
continue;
}
// if mp Get a single one and it is not in userdict, collect it in sequence
size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里i字符不是最后一个字符直接判定j字符
while (j < (words.size() - 1)
&& words[j].left == words[j].right
&& !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
j++;
}
// Cut the sequence with hmm
assert(j - 1 >= i);
// TODO
hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
//put hmm result to result
for (size_t k = 0; k < hmmRes.size(); k++) {
string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right);
if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) {
continue;
}
res[hmmStr].offsets.push_back(hmmRes[k].left->offset);
res[hmmStr].weight += 1.0;
}
//clear tmp vars
hmmRes.clear();
//let i jump over this piece
i = j - 1;
}
} else {//不存在中文分词结果
for (size_t i = 0; i < (size_t)(end - begin); i++) {
string str = s.substr((begin+i)->offset, (begin+i)->len);
res[str].offsets.push_back((begin+i)->offset);
res[str].weight += 1.0;
}
}
}
const DictTrie* GetDictTrie() const override {
return mpSeg_.GetDictTrie(); return mpSeg_.GetDictTrie();
} }
bool Tag(const string& src, vector<pair<string, string> >& res) const { bool Tag(const string& src, vector<pair<string, string> >& res) const override {
return tagger_.Tag(src, res, *this); return tagger_.Tag(src, res, *this);
} }
@ -115,7 +208,23 @@ public:
return tagger_.LookupTag(str, *this); return tagger_.LookupTag(str, *this);
} }
void LoadStopWordDict(const string& filePath) {
ifstream ifs(filePath.c_str());
if(not ifs.is_open()){
return ;
}
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
string line ;
while (getline(ifs, line)) {
stopWords_.insert(line);
}
assert(stopWords_.size());
}
private: private:
unordered_set<string> stopWords_;
MPSegment mpSeg_; MPSegment mpSeg_;
HMMSegment hmmSeg_; HMMSegment hmmSeg_;
PosTagger tagger_; PosTagger tagger_;
@ -124,4 +233,3 @@ private:
} // namespace cppjieba } // namespace cppjieba
#endif

View File

@ -1,27 +1,8 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_POS_TAGGING_H
#define CPPJIEBA_POS_TAGGING_H
#include "limonp/StringUtil.hpp" #include "limonp/StringUtil.hpp"
#include "SegmentTagged.hpp"
#include "DictTrie.hpp" #include "DictTrie.hpp"
#include "SegmentTagged.hpp"
namespace cppjieba { namespace cppjieba {
using namespace limonp; using namespace limonp;
@ -39,28 +20,31 @@ public:
bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const { bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
vector<string> CutRes; vector<string> CutRes;
segment.Cut(src, CutRes); segment.CutToStr(src, CutRes);
for(vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) { for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
res.push_back(make_pair(*itr, LookupTag(*itr, segment))); res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
} }
return !res.empty(); return !res.empty();
} }
string LookupTag(const string &str, const SegmentTagged& segment) const { string LookupTag(const string &str, const SegmentTagged& segment) const {
const DictUnit *tmp = NULL;
RuneStrArray runes;
const DictTrie * dict = segment.GetDictTrie(); const DictTrie * dict = segment.GetDictTrie();
assert(dict != NULL); assert(dict != NULL);
if(!DecodeRunesInString(str, runes)) { const auto tmp = dict->Find(str);
XLOG(ERROR) << "Decode failed.";
return POS_X; if (tmp == NULL || tmp->GetTag().empty()) {
} RuneStrArray runes;
tmp = dict->Find(runes.begin(), runes.end());
if(tmp == NULL || tmp->tag.empty()) { if (!DecodeRunesInString(str, runes)) {
XLOG(ERROR) << "Decode failed.";
return POS_X;
}
return SpecialRule(runes); return SpecialRule(runes);
} else { } else {
return tmp->tag; return tmp->GetTag();
} }
} }
@ -68,22 +52,27 @@ private:
const char* SpecialRule(const RuneStrArray& unicode) const { const char* SpecialRule(const RuneStrArray& unicode) const {
size_t m = 0; size_t m = 0;
size_t eng = 0; size_t eng = 0;
for(size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
if(unicode[i].rune < 0x80) { for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
if (unicode[i].rune < 0x80) {
eng ++; eng ++;
if('0' <= unicode[i].rune && unicode[i].rune <= '9') {
if ('0' <= unicode[i].rune && unicode[i].rune <= '9') {
m++; m++;
} }
} }
} }
// ascii char is not found // ascii char is not found
if(eng == 0) { if (eng == 0) {
return POS_X; return POS_X;
} }
// all the ascii is number char // all the ascii is number char
if(m == eng) { if (m == eng) {
return POS_M; return POS_M;
} }
// the ascii chars contain english letter // the ascii chars contain english letter
return POS_ENG; return POS_ENG;
} }
@ -92,4 +81,3 @@ private:
} // namespace cppjieba } // namespace cppjieba
#endif

View File

@ -1,43 +1,20 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_PRE_FILTER_H
#define CPPJIEBA_PRE_FILTER_H
#include "Trie.hpp"
#include "limonp/Logging.hpp" #include "limonp/Logging.hpp"
#include <unordered_set>
#include "Unicode.hpp"
namespace cppjieba { namespace cppjieba {
class PreFilter { class PreFilter {
public: public:
//TODO use WordRange instead of Range PreFilter(const std::unordered_set<Rune>& symbols,
struct Range {
RuneStrArray::const_iterator begin;
RuneStrArray::const_iterator end;
}; // struct Range
PreFilter(const unordered_set<Rune>& symbols,
const string& sentence) const string& sentence)
: symbols_(symbols) { : symbols_(symbols) {
if(!DecodeRunesInString(sentence, sentence_)) { if (!DecodeRunesInString(sentence, sentence_)) {
XLOG(ERROR) << "decode failed. "; XLOG(ERROR) << "decode failed. "<<sentence;
} }
cursor_ = sentence_.begin(); cursor_ = sentence_.begin();
} }
~PreFilter() { ~PreFilter() {
@ -45,28 +22,105 @@ public:
bool HasNext() const { bool HasNext() const {
return cursor_ != sentence_.end(); return cursor_ != sentence_.end();
} }
Range Next() { bool Next(WordRange& wordRange) {
Range range;
range.begin = cursor_; if (cursor_ == sentence_.end()) {
while(cursor_ != sentence_.end()) { return false;
if(IsIn(symbols_, cursor_->rune)) { }
if(range.begin == cursor_) {
wordRange.left = cursor_;
while (cursor_->rune == 0x20 && cursor_ != sentence_.end()) {
cursor_++;
}
if (cursor_ == sentence_.end()) {
wordRange.right = cursor_;
return true;
}
while (++cursor_ != sentence_.end()) {
if (cursor_->rune == 0x20) {
wordRange.right = cursor_;
return true;
}
}
wordRange.right = sentence_.end();
return true;
}
bool Next(WordRange& wordRange, bool& isNull) {
isNull = false;
if (cursor_ == sentence_.end()) {
return false;
}
wordRange.left = cursor_;
if (cursor_->rune == 0x20) {
while (cursor_ != sentence_.end()) {
if (cursor_->rune != 0x20) {
if (wordRange.left == cursor_) {
cursor_ ++;
}
wordRange.right = cursor_;
isNull = true;
return true;
}
cursor_ ++;
}
}
int max_num = 0;
uint32_t utf8_num = cursor_->len;
while (cursor_ != sentence_.end()) {
if (cursor_->rune == 0x20) {
if (wordRange.left == cursor_) {
cursor_ ++; cursor_ ++;
} }
range.end = cursor_;
wordRange.right = cursor_;
return true;
}
cursor_ ++;
max_num++;
if (max_num >= 1024 or cursor_->len != utf8_num) { //todo 防止一次性传入过多字节暂定限制为1024个字
wordRange.right = cursor_;
return true;
}
}
wordRange.right = sentence_.end();
return true;
}
WordRange Next() {
WordRange range(cursor_, cursor_);
while (cursor_ != sentence_.end()) {
//if (IsIn(symbols_, cursor_->rune)) {
if (cursor_->rune == 0x20) {
if (range.left == cursor_) {
cursor_ ++;
}
range.right = cursor_;
return range; return range;
} }
cursor_ ++; cursor_ ++;
} }
range.end = sentence_.end();
range.right = sentence_.end();
return range; return range;
} }
private: private:
RuneStrArray::const_iterator cursor_; RuneStrArray::const_iterator cursor_;
RuneStrArray sentence_; RuneStrArray sentence_;
const unordered_set<Rune>& symbols_; const std::unordered_set<Rune>& symbols_;
}; // class PreFilter }; // class PreFilter
} // namespace cppjieba } // namespace cppjieba
#endif // CPPJIEBA_PRE_FILTER_H

View File

@ -1,23 +1,4 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_QUERYSEGMENT_H
#define CPPJIEBA_QUERYSEGMENT_H
#include <algorithm> #include <algorithm>
#include <set> #include <set>
@ -28,74 +9,70 @@
#include "FullSegment.hpp" #include "FullSegment.hpp"
#include "MixSegment.hpp" #include "MixSegment.hpp"
#include "Unicode.hpp" #include "Unicode.hpp"
#include "DictTrie.hpp"
namespace cppjieba { namespace cppjieba {
class QuerySegment: public SegmentBase { class QuerySegment: public SegmentBase {
public: public:
QuerySegment(const string& dict, const string& model, const string& userDict = "") QuerySegment(const DictTrie* dictTrie,
: mixSeg_(dict, model, userDict), const HMMModel* model,
trie_(mixSeg_.GetDictTrie()) { const string& stopWordPath)
} : mixSeg_(dictTrie, model, stopWordPath), trie_(dictTrie) {
QuerySegment(const DictTrie* dictTrie, const HMMModel* model)
: mixSeg_(dictTrie, model), trie_(dictTrie) {
} }
~QuerySegment() { ~QuerySegment() {
} }
void Cut(const string& sentence, vector<string>& words) const { virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
Cut(sentence, words, true); size_t) const override {
}
void Cut(const string& sentence, vector<string>& words, bool hmm) const {
vector<Word> tmp;
Cut(sentence, tmp, hmm);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while(pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs, hmm);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
//use mix Cut first //use mix Cut first
vector<WordRange> mixRes; vector<WordRange> mixRes;
mixSeg_.Cut(begin, end, mixRes, hmm); mixSeg_.CutRuneArray(begin, end, mixRes, hmm);
vector<WordRange> fullRes; vector<WordRange> fullRes;
for(vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
if(mixResItr->Length() > 2) { for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
for(size_t i = 0; i + 1 < mixResItr->Length(); i++) { if (mixResItr->Length() > 2) {
WordRange wr(mixResItr->left + i, mixResItr->left + i + 1); for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
if(trie_->Find(wr.left, wr.right + 1) != NULL) { string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 2);
if (trie_->Find(text) != NULL) {
WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
res.push_back(wr); res.push_back(wr);
} }
} }
} }
if(mixResItr->Length() > 3) {
for(size_t i = 0; i + 2 < mixResItr->Length(); i++) { if (mixResItr->Length() > 3) {
WordRange wr(mixResItr->left + i, mixResItr->left + i + 2); for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
if(trie_->Find(wr.left, wr.right + 1) != NULL) { string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 3);
if (trie_->Find(text) != NULL) {
WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
res.push_back(wr); res.push_back(wr);
} }
} }
} }
res.push_back(*mixResItr); res.push_back(*mixResItr);
} }
} }
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
size_t) const override {
}
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
size_t) const override {
}
private: private:
bool IsAllAscii(const Unicode& s) const { bool IsAllAscii(const RuneArray& s) const {
for(size_t i = 0; i < s.size(); i++) { for (size_t i = 0; i < s.size(); i++) {
if(s[i] >= 0x80) { if (s[i] >= 0x80) {
return false; return false;
} }
} }
return true; return true;
} }
MixSegment mixSeg_; MixSegment mixSeg_;
@ -104,4 +81,3 @@ private:
} // namespace cppjieba } // namespace cppjieba
#endif

View File

@ -1,23 +1,4 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_SEGMENTBASE_H
#define CPPJIEBA_SEGMENTBASE_H
#include "limonp/Logging.hpp" #include "limonp/Logging.hpp"
#include "PreFilter.hpp" #include "PreFilter.hpp"
@ -35,24 +16,74 @@ public:
SegmentBase() { SegmentBase() {
XCHECK(ResetSeparators(SPECIAL_SEPARATORS)); XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
} }
virtual ~SegmentBase() { virtual ~SegmentBase() { }
virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
size_t max_word_len) const = 0;
//添加基于sentence的cut方法减少中间变量的存储与格式转换--jxx20210517
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
size_t max_word_len) const = 0;
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
size_t max_word_len) const = 0;
//重写CutToStr函数简化获取vector<string>& words的流程降低内存占用--jxx20210517
void CutToStr(const string& sentence, vector<string>& words, bool hmm = true,
size_t max_word_len = MAX_WORD_LENGTH) const {
PreFilter pre_filter(symbols_, sentence);
words.clear();
words.reserve(sentence.size() / 2);//todo 参考源码,参数待定
RuneStrArray::const_iterator null_p;
WordRange range(null_p, null_p);
while (pre_filter.Next(range)) {
CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
}
}
void CutToStr(const string& sentence, WordRange range, vector<string>& words, bool hmm = true,
size_t max_word_len = MAX_WORD_LENGTH) const {
CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
}
void CutToStr(const string& sentence, WordRange range, unordered_map<string, KeyWord>& words, bool hmm = true,
size_t max_word_len = MAX_WORD_LENGTH) const {
CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
}
void CutToWord(const string& sentence, vector<Word>& words, bool hmm = true,
size_t max_word_len = MAX_WORD_LENGTH) const {
PreFilter pre_filter(symbols_, sentence);
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while (pre_filter.HasNext()) {
auto range = pre_filter.Next();
Cut(range.left, range.right, wrs, hmm, max_word_len);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
wrs.clear();
vector<WordRange>().swap(wrs);
} }
virtual void Cut(const string& sentence, vector<string>& words) const = 0; void CutRuneArray(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res,
bool hmm = true, size_t max_word_len = MAX_WORD_LENGTH) const {
Cut(begin, end, res, hmm, max_word_len);
}
bool ResetSeparators(const string& s) { bool ResetSeparators(const string& s) {
symbols_.clear(); symbols_.clear();
RuneStrArray runes; RuneStrArray runes;
if(!DecodeRunesInString(s, runes)) {
if (!DecodeRunesInString(s, runes)) {
XLOG(ERROR) << "decode " << s << " failed"; XLOG(ERROR) << "decode " << s << " failed";
return false; return false;
} }
for(size_t i = 0; i < runes.size(); i++) {
if(!symbols_.insert(runes[i].rune).second) { for (size_t i = 0; i < runes.size(); i++) {
if (!symbols_.insert(runes[i].rune).second) {
XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists"; XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
return false; return false;
} }
} }
return true; return true;
} }
protected: protected:
@ -61,4 +92,3 @@ protected:
} // cppjieba } // cppjieba
#endif

View File

@ -1,23 +1,4 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_SEGMENTTAGGED_H
#define CPPJIEBA_SEGMENTTAGGED_H
#include "SegmentBase.hpp" #include "SegmentBase.hpp"
@ -38,4 +19,3 @@ public:
} // cppjieba } // cppjieba
#endif

View File

@ -1,212 +1,205 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd. #include <cmath>
* #include "Jieba.hpp"
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by namespace cppjieba {
* the Free Software Foundation, either version 3 of the License, or using namespace limonp;
* (at your option) any later version. using namespace std;
*
* This program is distributed in the hope that it will be useful, class TextRankExtractor {
* but WITHOUT ANY WARRANTY; without even the implied warranty of public:
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the typedef struct _Word {
* GNU General Public License for more details. string word;
* vector<size_t> offsets;
* You should have received a copy of the GNU General Public License double weight;
* along with this program. If not, see <https://www.gnu.org/licenses/>. } Word; // struct Word
* private:
* typedef std::map<string, Word> WordMap;
*/
#ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H class WordGraph {
#define CPPJIEBA_TEXTRANK_EXTRACTOR_H private:
typedef double Score;
#include <cmath> typedef string Node;
#include "Jieba.hpp" typedef std::set<Node> NodeSet;
namespace cppjieba { typedef std::map<Node, double> Edges;
using namespace limonp; typedef std::map<Node, Edges> Graph;
using namespace std; //typedef std::unordered_map<Node,double> Edges;
//typedef std::unordered_map<Node,Edges> Graph;
class TextRankExtractor {
public: double d;
typedef struct _Word { Graph graph;
string word; NodeSet nodeSet;
vector<size_t> offsets; public:
double weight; WordGraph(): d(0.85) {};
} Word; // struct Word WordGraph(double in_d): d(in_d) {};
private:
typedef std::map<string, Word> WordMap; void addEdge(Node start, Node end, double weight) {
Edges temp;
class WordGraph { Edges::iterator gotEdges;
private: nodeSet.insert(start);
typedef double Score; nodeSet.insert(end);
typedef string Node; graph[start][end] += weight;
typedef std::set<Node> NodeSet; graph[end][start] += weight;
}
typedef std::map<Node, double> Edges;
typedef std::map<Node, Edges> Graph; void rank(WordMap &ws, size_t rankTime = 10) {
//typedef std::unordered_map<Node,double> Edges; WordMap outSum;
//typedef std::unordered_map<Node,Edges> Graph; Score wsdef, min_rank, max_rank;
double d; if (graph.size() == 0) {
Graph graph; return;
NodeSet nodeSet; }
public:
WordGraph(): d(0.85) {}; wsdef = 1.0 / graph.size();
WordGraph(double in_d): d(in_d) {};
for (Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) {
void addEdge(Node start, Node end, double weight) { // edges->first start节点edge->first end节点edge->second 权重
Edges temp; ws[edges->first].word = edges->first;
Edges::iterator gotEdges; ws[edges->first].weight = wsdef;
nodeSet.insert(start); outSum[edges->first].weight = 0;
nodeSet.insert(end);
graph[start][end] += weight; for (Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) {
graph[end][start] += weight; outSum[edges->first].weight += edge->second;
} }
}
void rank(WordMap &ws, size_t rankTime = 10) {
WordMap outSum; //sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
Score wsdef, min_rank, max_rank; for (size_t i = 0; i < rankTime; i++) {
for (NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) {
if(graph.size() == 0) double s = 0;
return;
for (Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
wsdef = 1.0 / graph.size(); // edge->first end节点edge->second 权重
{
for(Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) { s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
// edges->first start节点edge->first end节点edge->second 权重 }
ws[edges->first].word = edges->first;
ws[edges->first].weight = wsdef; ws[*node].weight = (1 - d) + d * s;
outSum[edges->first].weight = 0; }
for(Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) { }
outSum[edges->first].weight += edge->second;
} min_rank = max_rank = ws.begin()->second.weight;
}
//sort(nodeSet.begin(),nodeSet.end()); 是否需要排序? for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
for(size_t i = 0; i < rankTime; i++) { if (i->second.weight < min_rank) {
for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) { min_rank = i->second.weight;
double s = 0; }
for(Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
// edge->first end节点edge->second 权重 if (i->second.weight > max_rank) {
s += edge->second / outSum[edge->first].weight * ws[edge->first].weight; max_rank = i->second.weight;
ws[*node].weight = (1 - d) + d * s; }
} }
}
for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
min_rank = max_rank = ws.begin()->second.weight; ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++) { }
if(i->second.weight < min_rank) { }
min_rank = i->second.weight; };
}
if(i->second.weight > max_rank) { public:
max_rank = i->second.weight; TextRankExtractor(const DictTrie* dictTrie,
} const HMMModel* model,
} const string& stopWordPath)
for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++) { : segment_(dictTrie, model) {
ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0); LoadStopWordDict(stopWordPath);
} }
} TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
}; LoadStopWordDict(stopWordPath);
}
public: ~TextRankExtractor() {
TextRankExtractor(const string& dictPath, }
const string& hmmFilePath,
const string& stopWordPath, void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
const string& userDict = "") vector<Word> topWords;
: segment_(dictPath, hmmFilePath, userDict) { Extract(sentence, topWords, topN);
LoadStopWordDict(stopWordPath);
} for (size_t i = 0; i < topWords.size(); i++) {
TextRankExtractor(const DictTrie* dictTrie, keywords.push_back(topWords[i].word);
const HMMModel* model, }
const string& stopWordPath) }
: segment_(dictTrie, model) {
LoadStopWordDict(stopWordPath); void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
} vector<Word> topWords;
TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) { Extract(sentence, topWords, topN);
LoadStopWordDict(stopWordPath);
} for (size_t i = 0; i < topWords.size(); i++) {
~TextRankExtractor() { keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
} }
}
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
vector<Word> topWords; void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
Extract(sentence, topWords, topN); vector<string> words;
for(size_t i = 0; i < topWords.size(); i++) { segment_.CutToStr(sentence, words);
keywords.push_back(topWords[i].word);
} TextRankExtractor::WordGraph graph;
} WordMap wordmap;
size_t offset = 0;
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
vector<Word> topWords; for (size_t i = 0; i < words.size(); i++) {
Extract(sentence, topWords, topN); size_t t = offset;
for(size_t i = 0; i < topWords.size(); i++) { offset += words[i].size();
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
} if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
} continue;
}
void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
vector<string> words; for (size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) {
segment_.Cut(sentence, words); if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
skip++;
TextRankExtractor::WordGraph graph; continue;
WordMap wordmap; }
size_t offset = 0;
graph.addEdge(words[i], words[j], 1);
for(size_t i = 0; i < words.size(); i++) { }
size_t t = offset;
offset += words[i].size(); wordmap[words[i]].offsets.push_back(t);
if(IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) { }
continue;
} if (offset != sentence.size()) {
for(size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) { XLOG(ERROR) << "words illegal";
if(IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) { return;
skip++; }
continue;
} graph.rank(wordmap, rankTime);
graph.addEdge(words[i], words[j], 1);
} keywords.clear();
wordmap[words[i]].offsets.push_back(t); keywords.reserve(wordmap.size());
}
if(offset != sentence.size()) { for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
XLOG(ERROR) << "words illegal"; keywords.push_back(itr->second);
return; }
}
topN = min(topN, keywords.size());
graph.rank(wordmap, rankTime); partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
keywords.resize(topN);
keywords.clear(); }
keywords.reserve(wordmap.size()); private:
for(WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) { void LoadStopWordDict(const string& filePath) {
keywords.push_back(itr->second); ifstream ifs(filePath.c_str());
} XCHECK(ifs.is_open()) << "open " << filePath << " failed";
string line ;
topN = min(topN, keywords.size());
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare); while (getline(ifs, line)) {
keywords.resize(topN); stopWords_.insert(line);
} }
private:
void LoadStopWordDict(const string& filePath) { assert(stopWords_.size());
ifstream ifs(filePath.c_str()); }
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
string line ; static bool Compare(const Word &x, const Word &y) {
while(getline(ifs, line)) { return x.weight > y.weight;
stopWords_.insert(line); }
}
assert(stopWords_.size()); MixSegment segment_;
} unordered_set<string> stopWords_;
}; // class TextRankExtractor
static bool Compare(const Word &x, const Word &y) {
return x.weight > y.weight; inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
} return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
"}";
MixSegment segment_; }
unordered_set<string> stopWords_; } // namespace cppjieba
}; // class TextRankExtractor
inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}";
}
} // namespace cppjieba
#endif

View File

@ -1,192 +0,0 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_TRIE_HPP
#define CPPJIEBA_TRIE_HPP
#include <vector>
#include <queue>
#include "limonp/StdExtension.hpp"
#include "Unicode.hpp"
namespace cppjieba {
using namespace std;
const size_t MAX_WORD_LENGTH = 512;
struct DictUnit {
Unicode word;
double weight;
string tag;
}; // struct DictUnit
// for debugging
// inline ostream & operator << (ostream& os, const DictUnit& unit) {
// string s;
// s << unit.word;
// return os << StringFormat("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
// }
struct Dag {
RuneStr runestr;
// [offset, nexts.first]
limonp::LocalVector<pair<size_t, const DictUnit*> > nexts;
const DictUnit * pInfo;
double weight;
size_t nextPos; // TODO
Dag(): runestr(), pInfo(NULL), weight(0.0), nextPos(0) {
}
}; // struct Dag
typedef Rune TrieKey;
class TrieNode {
public :
TrieNode(): next(NULL), ptValue(NULL) {
}
public:
typedef unordered_map<TrieKey, TrieNode*> NextMap;
NextMap *next;
const DictUnit *ptValue;
};
class Trie {
public:
Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
: root_(new TrieNode) {
CreateTrie(keys, valuePointers);
}
~Trie() {
DeleteNode(root_);
}
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
if(begin == end) {
return NULL;
}
const TrieNode* ptNode = root_;
TrieNode::NextMap::const_iterator citer;
for(RuneStrArray::const_iterator it = begin; it != end; it++) {
if(NULL == ptNode->next) {
return NULL;
}
citer = ptNode->next->find(it->rune);
if(ptNode->next->end() == citer) {
return NULL;
}
ptNode = citer->second;
}
return ptNode->ptValue;
}
void Find(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<struct Dag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const {
assert(root_ != NULL);
res.resize(end - begin);
const TrieNode *ptNode = NULL;
TrieNode::NextMap::const_iterator citer;
for(size_t i = 0; i < size_t(end - begin); i++) {
res[i].runestr = *(begin + i);
if(root_->next != NULL && root_->next->end() != (citer = root_->next->find(res[i].runestr.rune))) {
ptNode = citer->second;
} else {
ptNode = NULL;
}
if(ptNode != NULL) {
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, ptNode->ptValue));
} else {
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, static_cast<const DictUnit*>(NULL)));
}
for(size_t j = i + 1; j < size_t(end - begin) && (j - i + 1) <= max_word_len; j++) {
if(ptNode == NULL || ptNode->next == NULL) {
break;
}
citer = ptNode->next->find((begin + j)->rune);
if(ptNode->next->end() == citer) {
break;
}
ptNode = citer->second;
if(NULL != ptNode->ptValue) {
res[i].nexts.push_back(pair<size_t, const DictUnit*>(j, ptNode->ptValue));
}
}
}
}
void InsertNode(const Unicode& key, const DictUnit* ptValue) {
if(key.begin() == key.end()) {
return;
}
TrieNode::NextMap::const_iterator kmIter;
TrieNode *ptNode = root_;
for(Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) {
if(NULL == ptNode->next) {
ptNode->next = new TrieNode::NextMap;
}
kmIter = ptNode->next->find(*citer);
if(ptNode->next->end() == kmIter) {
TrieNode *nextNode = new TrieNode;
ptNode->next->insert(make_pair(*citer, nextNode));
ptNode = nextNode;
} else {
ptNode = kmIter->second;
}
}
assert(ptNode != NULL);
ptNode->ptValue = ptValue;
}
private:
void CreateTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
if(valuePointers.empty() || keys.empty()) {
return;
}
assert(keys.size() == valuePointers.size());
for(size_t i = 0; i < keys.size(); i++) {
InsertNode(keys[i], valuePointers[i]);
}
}
void DeleteNode(TrieNode* node) {
if(NULL == node) {
return;
}
if(NULL != node->next) {
for(TrieNode::NextMap::iterator it = node->next->begin(); it != node->next->end(); ++it) {
DeleteNode(it->second);
}
delete node->next;
}
delete node;
}
TrieNode* root_;
}; // class Trie
} // namespace cppjieba
#endif // CPPJIEBA_TRIE_HPP

View File

@ -1,23 +1,4 @@
/* #pragma once
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef CPPJIEBA_UNICODE_H
#define CPPJIEBA_UNICODE_H
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
@ -25,6 +6,7 @@
#include <vector> #include <vector>
#include <ostream> #include <ostream>
#include "limonp/LocalVector.hpp" #include "limonp/LocalVector.hpp"
#include "limonp/StringUtil.hpp"
namespace cppjieba { namespace cppjieba {
@ -33,6 +15,12 @@ using std::vector;
typedef uint32_t Rune; typedef uint32_t Rune;
struct KeyWord {
string word;
vector<size_t> offsets;
double weight;
}; // struct Word
struct Word { struct Word {
string word; string word;
uint32_t offset; uint32_t offset;
@ -50,28 +38,28 @@ inline std::ostream& operator << (std::ostream& os, const Word& w) {
return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}"; return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
} }
struct RuneStr { struct RuneInfo {
Rune rune; Rune rune;
uint32_t offset; uint32_t offset;
uint32_t len; uint32_t len;
uint32_t unicode_offset; uint32_t unicode_offset = 0;
uint32_t unicode_length; uint32_t unicode_length = 0;
RuneStr(): rune(0), offset(0), len(0), unicode_offset(0), unicode_length(0) { RuneInfo(): rune(0), offset(0), len(0) {
} }
RuneStr(Rune r, uint32_t o, uint32_t l) RuneInfo(Rune r, uint32_t o, uint32_t l)
: rune(r), offset(o), len(l), unicode_offset(0), unicode_length(0) { : rune(r), offset(o), len(l) {
} }
RuneStr(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length) RuneInfo(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
: rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) { : rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
} }
}; // struct RuneStr }; // struct RuneInfo
inline std::ostream& operator << (std::ostream& os, const RuneStr& r) { inline std::ostream& operator << (std::ostream& os, const RuneInfo& r) {
return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}"; return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
} }
typedef limonp::LocalVector<Rune> Unicode; typedef limonp::LocalVector<Rune> RuneArray;
typedef limonp::LocalVector<struct RuneStr> RuneStrArray; typedef limonp::LocalVector<struct RuneInfo> RuneStrArray;
// [left, right] // [left, right]
struct WordRange { struct WordRange {
@ -81,129 +69,157 @@ struct WordRange {
: left(l), right(r) { : left(l), right(r) {
} }
size_t Length() const { size_t Length() const {
return right - left + 1; return right - left;
} }
bool IsAllAscii() const { bool IsAllAscii() const {
for(RuneStrArray::const_iterator iter = left; iter <= right; ++iter) { for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
if(iter->rune >= 0x80) { if (iter->rune >= 0x80) {
return false; return false;
} }
} }
return true; return true;
} }
}; // struct WordRange }; // struct WordRange
struct RuneStrLite {
uint32_t rune;
uint32_t len;
RuneStrLite(): rune(0), len(0) {
}
RuneStrLite(uint32_t r, uint32_t l): rune(r), len(l) {
}
}; // struct RuneStrLite
inline RuneStrLite DecodeRuneInString(const char* str, size_t len) { inline bool DecodeRunesInString(const string& s, RuneArray& arr) {
RuneStrLite rp(0, 0); arr.clear();
if(str == NULL || len == 0) { return limonp::Utf8ToUnicode32(s, arr);
return rp; }
}
if(!(str[0] & 0x80)) { // 0xxxxxxx inline RuneArray DecodeRunesInString(const string& s) {
RuneArray result;
DecodeRunesInString(s, result);
return result;
}
//重写DecodeRunesInString函数将实现放入函数中降低内存占用加快处理流程--jxx20210518
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
uint32_t tmp;
uint32_t offset = 0;
runes.clear();
uint32_t len(0);
for (size_t i = 0; i < s.size();) {
if (!(s.data()[i] & 0x80)) { // 0xxxxxxx
// 7bit, total 7bit // 7bit, total 7bit
rp.rune = (uint8_t)(str[0]) & 0x7f; tmp = (uint8_t)(s.data()[i]) & 0x7f;
rp.len = 1; i++;
} else if((uint8_t)str[0] <= 0xdf && 1 < len) { len = 1;
// 110xxxxxx } else if ((uint8_t)s.data()[i] <= 0xdf && i + 1 < s.size()) { // 110xxxxxx
// 5bit, total 5bit // 5bit, total 5bit
rp.rune = (uint8_t)(str[0]) & 0x1f; tmp = (uint8_t)(s.data()[i]) & 0x1f;
// 6bit, total 11bit // 6bit, total 11bit
rp.rune <<= 6; tmp <<= 6;
rp.rune |= (uint8_t)(str[1]) & 0x3f; tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
rp.len = 2; i += 2;
} else if((uint8_t)str[0] <= 0xef && 2 < len) { // 1110xxxxxx len = 2;
} else if((uint8_t)s.data()[i] <= 0xef && i + 2 < s.size()) { // 1110xxxxxx
// 4bit, total 4bit // 4bit, total 4bit
rp.rune = (uint8_t)(str[0]) & 0x0f; tmp = (uint8_t)(s.data()[i]) & 0x0f;
// 6bit, total 10bit // 6bit, total 10bit
rp.rune <<= 6; tmp <<= 6;
rp.rune |= (uint8_t)(str[1]) & 0x3f; tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
// 6bit, total 16bit // 6bit, total 16bit
rp.rune <<= 6; tmp <<= 6;
rp.rune |= (uint8_t)(str[2]) & 0x3f; tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
rp.len = 3; i += 3;
} else if((uint8_t)str[0] <= 0xf7 && 3 < len) { // 11110xxxx len = 3;
} else if((uint8_t)s.data()[i] <= 0xf7 && i + 3 < s.size()) { // 11110xxxx
// 3bit, total 3bit // 3bit, total 3bit
rp.rune = (uint8_t)(str[0]) & 0x07; tmp = (uint8_t)(s.data()[i]) & 0x07;
// 6bit, total 9bit // 6bit, total 9bit
rp.rune <<= 6; tmp <<= 6;
rp.rune |= (uint8_t)(str[1]) & 0x3f; tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
// 6bit, total 15bit // 6bit, total 15bit
rp.rune <<= 6; tmp <<= 6;
rp.rune |= (uint8_t)(str[2]) & 0x3f; tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
// 6bit, total 21bit // 6bit, total 21bit
rp.rune <<= 6; tmp <<= 6;
rp.rune |= (uint8_t)(str[3]) & 0x3f; tmp |= (uint8_t)(s.data()[i+3]) & 0x3f;
rp.len = 4; i += 4;
} else { len = 4;
rp.rune = 0; } else {
rp.len = 0;
}
return rp;
}
inline bool DecodeRunesInString(const char* s, size_t len, RuneStrArray& runes) {
runes.clear();
runes.reserve(len / 2);
for(uint32_t i = 0, j = 0; i < len;) {
RuneStrLite rp = DecodeRuneInString(s + i, len - i);
if(rp.len == 0) {
runes.clear();
return false;
}
RuneStr x(rp.rune, i, rp.len, j, 1);
runes.push_back(x);
i += rp.len;
++j;
}
return true;
}
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
return DecodeRunesInString(s.c_str(), s.size(), runes);
}
inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
unicode.clear();
RuneStrArray runes;
if(!DecodeRunesInString(s, len, runes)) {
return false; return false;
} }
unicode.reserve(runes.size()); RuneInfo x(tmp, offset, len, i, 1);
for(size_t i = 0; i < runes.size(); i++) { runes.push_back(x);
unicode.push_back(runes[i].rune); offset += len;
} }
return true; return true;
} }
class RunePtrWrapper {
public:
const RuneInfo * m_ptr = nullptr;
public:
explicit RunePtrWrapper(const RuneInfo * p) : m_ptr(p) {}
uint32_t operator *() {
return m_ptr->rune;
}
RunePtrWrapper operator ++(int) {
m_ptr ++;
return RunePtrWrapper(m_ptr);
}
bool operator !=(const RunePtrWrapper & b) const {
return this->m_ptr != b.m_ptr;
}
};
inline string EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) {
string str;
RunePtrWrapper it_begin(begin), it_end(end);
limonp::Unicode32ToUtf8(it_begin, it_end, str);
return str;
}
inline void EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, string& str) {
RunePtrWrapper it_begin(begin), it_end(end);
limonp::Unicode32ToUtf8(it_begin, it_end, str);
return;
}
class Unicode32Counter {
public :
size_t length = 0;
void clear() {
length = 0;
}
void push_back(uint32_t) {
++length;
}
};
inline size_t Utf8CharNum(const char * str, size_t length) {
Unicode32Counter c;
if (limonp::Utf8ToUnicode32(str, length, c)) {
return c.length;
}
return 0;
}
inline size_t Utf8CharNum(const string & str) {
return Utf8CharNum(str.data(), str.size());
}
inline bool IsSingleWord(const string& str) { inline bool IsSingleWord(const string& str) {
RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size()); return Utf8CharNum(str) == 1;
return rp.len == str.size();
}
inline bool DecodeRunesInString(const string& s, Unicode& unicode) {
return DecodeRunesInString(s.c_str(), s.size(), unicode);
}
inline Unicode DecodeRunesInString(const string& s) {
Unicode result;
DecodeRunesInString(s, result);
return result;
} }
@ -217,29 +233,31 @@ inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left,
inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) { inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
assert(right->offset >= left->offset); assert(right->offset >= left->offset);
uint32_t len = right->offset - left->offset + right->len; //uint32_t len = right->offset - left->offset + right->len;
return s.substr(left->offset, len); return s.substr(left->offset, right->offset - left->offset + right->len);
} }
inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) { inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
for(size_t i = 0; i < wrs.size(); i++) { for (size_t i = 0; i < wrs.size(); i++) {
words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right)); words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
} }
} }
inline vector<Word> GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs) { inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
vector<Word> result; for (size_t i = 0; i < wrs.size(); i++) {
GetWordsFromWordRanges(s, wrs, result); words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
return result; }
} }
inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) { inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
strs.resize(words.size()); strs.resize(words.size());
for(size_t i = 0; i < words.size(); ++i) {
for (size_t i = 0; i < words.size(); ++i) {
strs[i] = words[i].word; strs[i] = words[i].word;
} }
} }
const size_t MAX_WORD_LENGTH = 512;
} // namespace cppjieba } // namespace cppjieba
#endif // CPPJIEBA_UNICODE_H

View File

@ -2,6 +2,7 @@ INCLUDEPATH += $$PWD
HEADERS += \ HEADERS += \
$$PWD/DictTrie.hpp \ $$PWD/DictTrie.hpp \
$$PWD/IdfTrie.hpp \
$$PWD/FullSegment.hpp \ $$PWD/FullSegment.hpp \
$$PWD/HMMModel.hpp \ $$PWD/HMMModel.hpp \
$$PWD/HMMSegment.hpp \ $$PWD/HMMSegment.hpp \
@ -17,5 +18,4 @@ HEADERS += \
$$PWD/TextRankExtractor.hpp \ $$PWD/TextRankExtractor.hpp \
$$PWD/Trie.hpp \ $$PWD/Trie.hpp \
$$PWD/Unicode.hpp $$PWD/Unicode.hpp
include(limonp/limonp.pri) include(limonp/limonp.pri)

File diff suppressed because it is too large Load Diff

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
/************************************ /************************************
* file enc : ascii * file enc : ascii
* author : wuyanyi09@gmail.com * author : wuyanyi09@gmail.com
@ -33,54 +15,54 @@ namespace limonp {
using namespace std; using namespace std;
class ArgvContext { class ArgvContext {
public : public :
ArgvContext(int argc, const char* const * argv) { ArgvContext(int argc, const char* const * argv) {
for(int i = 0; i < argc; i++) { for(int i = 0; i < argc; i++) {
if(StartsWith(argv[i], "-")) { if(StartsWith(argv[i], "-")) {
if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) { if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
mpss_[argv[i]] = argv[i + 1]; mpss_[argv[i]] = argv[i+1];
i++; i++;
} else { } else {
sset_.insert(argv[i]); sset_.insert(argv[i]);
}
} else {
args_.push_back(argv[i]);
}
} }
} else {
args_.push_back(argv[i]);
}
} }
~ArgvContext() { }
} ~ArgvContext() {
}
friend ostream& operator << (ostream& os, const ArgvContext& args); friend ostream& operator << (ostream& os, const ArgvContext& args);
string operator [](size_t i) const { string operator [](size_t i) const {
if(i < args_.size()) { if(i < args_.size()) {
return args_[i]; return args_[i];
}
return "";
} }
string operator [](const string& key) const { return "";
map<string, string>::const_iterator it = mpss_.find(key); }
if(it != mpss_.end()) { string operator [](const string& key) const {
return it->second; map<string, string>::const_iterator it = mpss_.find(key);
} if(it != mpss_.end()) {
return ""; return it->second;
} }
return "";
}
bool HasKey(const string& key) const { bool HasKey(const string& key) const {
if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) { if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
return true; return true;
}
return false;
} }
return false;
}
private: private:
vector<string> args_; vector<string> args_;
map<string, string> mpss_; map<string, string> mpss_;
set<string> sset_; set<string> sset_;
}; // class ArgvContext }; // class ArgvContext
inline ostream& operator << (ostream& os, const ArgvContext& args) { inline ostream& operator << (ostream& os, const ArgvContext& args) {
return os << args.args_ << args.mpss_ << args.sset_; return os<<args.args_<<args.mpss_<<args.sset_;
} }
} // namespace limonp } // namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_BLOCKINGQUEUE_HPP #ifndef LIMONP_BLOCKINGQUEUE_HPP
#define LIMONP_BLOCKINGQUEUE_HPP #define LIMONP_BLOCKINGQUEUE_HPP
@ -25,41 +7,41 @@
namespace limonp { namespace limonp {
template<class T> template<class T>
class BlockingQueue: NonCopyable { class BlockingQueue: NonCopyable {
public: public:
BlockingQueue() BlockingQueue()
: mutex_(), notEmpty_(mutex_), queue_() { : mutex_(), notEmpty_(mutex_), queue_() {
} }
void Push(const T& x) { void Push(const T& x) {
MutexLockGuard lock(mutex_); MutexLockGuard lock(mutex_);
queue_.push(x); queue_.push(x);
notEmpty_.Notify(); // Wait morphing saves us notEmpty_.Notify(); // Wait morphing saves us
} }
T Pop() { T Pop() {
MutexLockGuard lock(mutex_); MutexLockGuard lock(mutex_);
// always use a while-loop, due to spurious wakeup // always use a while-loop, due to spurious wakeup
while(queue_.empty()) { while (queue_.empty()) {
notEmpty_.Wait(); notEmpty_.Wait();
}
assert(!queue_.empty());
T front(queue_.front());
queue_.pop();
return front;
} }
assert(!queue_.empty());
T front(queue_.front());
queue_.pop();
return front;
}
size_t Size() const { size_t Size() const {
MutexLockGuard lock(mutex_); MutexLockGuard lock(mutex_);
return queue_.size(); return queue_.size();
} }
bool Empty() const { bool Empty() const {
return Size() == 0; return Size() == 0;
} }
private: private:
mutable MutexLock mutex_; mutable MutexLock mutex_;
Condition notEmpty_; Condition notEmpty_;
std::queue<T> queue_; std::queue<T> queue_;
}; // class BlockingQueue }; // class BlockingQueue
} // namespace limonp } // namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP #ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
#define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP #define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
@ -25,59 +7,59 @@ namespace limonp {
template<typename T> template<typename T>
class BoundedBlockingQueue : NonCopyable { class BoundedBlockingQueue : NonCopyable {
public: public:
explicit BoundedBlockingQueue(size_t maxSize) explicit BoundedBlockingQueue(size_t maxSize)
: mutex_(), : mutex_(),
notEmpty_(mutex_), notEmpty_(mutex_),
notFull_(mutex_), notFull_(mutex_),
queue_(maxSize) { queue_(maxSize) {
} }
void Push(const T& x) { void Push(const T& x) {
MutexLockGuard lock(mutex_); MutexLockGuard lock(mutex_);
while(queue_.Full()) { while (queue_.Full()) {
notFull_.Wait(); notFull_.Wait();
}
assert(!queue_.Full());
queue_.Push(x);
notEmpty_.Notify();
} }
assert(!queue_.Full());
queue_.Push(x);
notEmpty_.Notify();
}
T Pop() { T Pop() {
MutexLockGuard lock(mutex_); MutexLockGuard lock(mutex_);
while(queue_.Empty()) { while (queue_.Empty()) {
notEmpty_.Wait(); notEmpty_.Wait();
}
assert(!queue_.Empty());
T res = queue_.Pop();
notFull_.Notify();
return res;
} }
assert(!queue_.Empty());
T res = queue_.Pop();
notFull_.Notify();
return res;
}
bool Empty() const { bool Empty() const {
MutexLockGuard lock(mutex_); MutexLockGuard lock(mutex_);
return queue_.Empty(); return queue_.Empty();
} }
bool Full() const { bool Full() const {
MutexLockGuard lock(mutex_); MutexLockGuard lock(mutex_);
return queue_.Full(); return queue_.Full();
} }
size_t size() const { size_t size() const {
MutexLockGuard lock(mutex_); MutexLockGuard lock(mutex_);
return queue_.size(); return queue_.size();
} }
size_t capacity() const { size_t capacity() const {
return queue_.capacity(); return queue_.capacity();
} }
private: private:
mutable MutexLock mutex_; mutable MutexLock mutex_;
Condition notEmpty_; Condition notEmpty_;
Condition notFull_; Condition notFull_;
BoundedQueue<T> queue_; BoundedQueue<T> queue_;
}; // class BoundedBlockingQueue }; // class BoundedBlockingQueue
} // namespace limonp } // namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_BOUNDED_QUEUE_HPP #ifndef LIMONP_BOUNDED_QUEUE_HPP
#define LIMONP_BOUNDED_QUEUE_HPP #define LIMONP_BOUNDED_QUEUE_HPP
@ -27,55 +9,55 @@ namespace limonp {
using namespace std; using namespace std;
template<class T> template<class T>
class BoundedQueue { class BoundedQueue {
public: public:
explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) { explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
head_ = 0; head_ = 0;
tail_ = 0; tail_ = 0;
size_ = 0; size_ = 0;
assert(capacity_); assert(capacity_);
} }
~BoundedQueue() { ~BoundedQueue() {
} }
void Clear() { void Clear() {
head_ = 0; head_ = 0;
tail_ = 0; tail_ = 0;
size_ = 0; size_ = 0;
} }
bool Empty() const { bool Empty() const {
return !size_; return !size_;
} }
bool Full() const { bool Full() const {
return capacity_ == size_; return capacity_ == size_;
} }
size_t Size() const { size_t Size() const {
return size_; return size_;
} }
size_t Capacity() const { size_t Capacity() const {
return capacity_; return capacity_;
} }
void Push(const T& t) { void Push(const T& t) {
assert(!Full()); assert(!Full());
circular_buffer_[tail_] = t; circular_buffer_[tail_] = t;
tail_ = (tail_ + 1) % capacity_; tail_ = (tail_ + 1) % capacity_;
size_ ++; size_ ++;
} }
T Pop() { T Pop() {
assert(!Empty()); assert(!Empty());
size_t oldPos = head_; size_t oldPos = head_;
head_ = (head_ + 1) % capacity_; head_ = (head_ + 1) % capacity_;
size_ --; size_ --;
return circular_buffer_[oldPos]; return circular_buffer_[oldPos];
} }
private: private:
size_t head_; size_t head_;
size_t tail_; size_t tail_;
size_t size_; size_t size_;
const size_t capacity_; const size_t capacity_;
vector<T> circular_buffer_; vector<T> circular_buffer_;
}; // class BoundedQueue }; // class BoundedQueue
} // namespace limonp } // namespace limonp

View File

@ -1,222 +1,204 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_CLOSURE_HPP #ifndef LIMONP_CLOSURE_HPP
#define LIMONP_CLOSURE_HPP #define LIMONP_CLOSURE_HPP
namespace limonp { namespace limonp {
class ClosureInterface { class ClosureInterface {
public: public:
virtual ~ClosureInterface() { virtual ~ClosureInterface() {
} }
virtual void Run() = 0; virtual void Run() = 0;
}; };
template <class Funct> template <class Funct>
class Closure0: public ClosureInterface { class Closure0: public ClosureInterface {
public: public:
Closure0(Funct fun) { Closure0(Funct fun) {
fun_ = fun; fun_ = fun;
} }
virtual ~Closure0() { virtual ~Closure0() {
} }
virtual void Run() { virtual void Run() {
(*fun_)(); (*fun_)();
} }
private: private:
Funct fun_; Funct fun_;
}; };
template <class Funct, class Arg1> template <class Funct, class Arg1>
class Closure1: public ClosureInterface { class Closure1: public ClosureInterface {
public: public:
Closure1(Funct fun, Arg1 arg1) { Closure1(Funct fun, Arg1 arg1) {
fun_ = fun; fun_ = fun;
arg1_ = arg1; arg1_ = arg1;
} }
virtual ~Closure1() { virtual ~Closure1() {
} }
virtual void Run() { virtual void Run() {
(*fun_)(arg1_); (*fun_)(arg1_);
} }
private: private:
Funct fun_; Funct fun_;
Arg1 arg1_; Arg1 arg1_;
}; };
template <class Funct, class Arg1, class Arg2> template <class Funct, class Arg1, class Arg2>
class Closure2: public ClosureInterface { class Closure2: public ClosureInterface {
public: public:
Closure2(Funct fun, Arg1 arg1, Arg2 arg2) { Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
fun_ = fun; fun_ = fun;
arg1_ = arg1; arg1_ = arg1;
arg2_ = arg2; arg2_ = arg2;
} }
virtual ~Closure2() { virtual ~Closure2() {
} }
virtual void Run() { virtual void Run() {
(*fun_)(arg1_, arg2_); (*fun_)(arg1_, arg2_);
} }
private: private:
Funct fun_; Funct fun_;
Arg1 arg1_; Arg1 arg1_;
Arg2 arg2_; Arg2 arg2_;
}; };
template <class Funct, class Arg1, class Arg2, class Arg3> template <class Funct, class Arg1, class Arg2, class Arg3>
class Closure3: public ClosureInterface { class Closure3: public ClosureInterface {
public: public:
Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) { Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
fun_ = fun; fun_ = fun;
arg1_ = arg1; arg1_ = arg1;
arg2_ = arg2; arg2_ = arg2;
arg3_ = arg3; arg3_ = arg3;
} }
virtual ~Closure3() { virtual ~Closure3() {
} }
virtual void Run() { virtual void Run() {
(*fun_)(arg1_, arg2_, arg3_); (*fun_)(arg1_, arg2_, arg3_);
} }
private: private:
Funct fun_; Funct fun_;
Arg1 arg1_; Arg1 arg1_;
Arg2 arg2_; Arg2 arg2_;
Arg3 arg3_; Arg3 arg3_;
}; };
template <class Obj, class Funct> template <class Obj, class Funct>
class ObjClosure0: public ClosureInterface { class ObjClosure0: public ClosureInterface {
public: public:
ObjClosure0(Obj* p, Funct fun) { ObjClosure0(Obj* p, Funct fun) {
p_ = p; p_ = p;
fun_ = fun; fun_ = fun;
} }
virtual ~ObjClosure0() { virtual ~ObjClosure0() {
} }
virtual void Run() { virtual void Run() {
(p_->*fun_)(); (p_->*fun_)();
} }
private: private:
Obj* p_; Obj* p_;
Funct fun_; Funct fun_;
}; };
template <class Obj, class Funct, class Arg1> template <class Obj, class Funct, class Arg1>
class ObjClosure1: public ClosureInterface { class ObjClosure1: public ClosureInterface {
public: public:
ObjClosure1(Obj* p, Funct fun, Arg1 arg1) { ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
p_ = p; p_ = p;
fun_ = fun; fun_ = fun;
arg1_ = arg1; arg1_ = arg1;
} }
virtual ~ObjClosure1() { virtual ~ObjClosure1() {
} }
virtual void Run() { virtual void Run() {
(p_->*fun_)(arg1_); (p_->*fun_)(arg1_);
} }
private: private:
Obj* p_; Obj* p_;
Funct fun_; Funct fun_;
Arg1 arg1_; Arg1 arg1_;
}; };
template <class Obj, class Funct, class Arg1, class Arg2> template <class Obj, class Funct, class Arg1, class Arg2>
class ObjClosure2: public ClosureInterface { class ObjClosure2: public ClosureInterface {
public: public:
ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) { ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
p_ = p; p_ = p;
fun_ = fun; fun_ = fun;
arg1_ = arg1; arg1_ = arg1;
arg2_ = arg2; arg2_ = arg2;
} }
virtual ~ObjClosure2() { virtual ~ObjClosure2() {
} }
virtual void Run() { virtual void Run() {
(p_->*fun_)(arg1_, arg2_); (p_->*fun_)(arg1_, arg2_);
} }
private: private:
Obj* p_; Obj* p_;
Funct fun_; Funct fun_;
Arg1 arg1_; Arg1 arg1_;
Arg2 arg2_; Arg2 arg2_;
}; };
template <class Obj, class Funct, class Arg1, class Arg2, class Arg3> template <class Obj, class Funct, class Arg1, class Arg2, class Arg3>
class ObjClosure3: public ClosureInterface { class ObjClosure3: public ClosureInterface {
public: public:
ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) { ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
p_ = p; p_ = p;
fun_ = fun; fun_ = fun;
arg1_ = arg1; arg1_ = arg1;
arg2_ = arg2; arg2_ = arg2;
arg3_ = arg3; arg3_ = arg3;
} }
virtual ~ObjClosure3() { virtual ~ObjClosure3() {
} }
virtual void Run() { virtual void Run() {
(p_->*fun_)(arg1_, arg2_, arg3_); (p_->*fun_)(arg1_, arg2_, arg3_);
} }
private: private:
Obj* p_; Obj* p_;
Funct fun_; Funct fun_;
Arg1 arg1_; Arg1 arg1_;
Arg2 arg2_; Arg2 arg2_;
Arg3 arg3_; Arg3 arg3_;
}; };
template<class R> template<class R>
ClosureInterface* NewClosure(R(*fun)()) { ClosureInterface* NewClosure(R (*fun)()) {
return new Closure0<R(*)()>(fun); return new Closure0<R (*)()>(fun);
} }
template<class R, class Arg1> template<class R, class Arg1>
ClosureInterface* NewClosure(R(*fun)(Arg1), Arg1 arg1) { ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) {
return new Closure1<R(*)(Arg1), Arg1>(fun, arg1); return new Closure1<R (*)(Arg1), Arg1>(fun, arg1);
} }
template<class R, class Arg1, class Arg2> template<class R, class Arg1, class Arg2>
ClosureInterface* NewClosure(R(*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) { ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
return new Closure2<R(*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2); return new Closure2<R (*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
} }
template<class R, class Arg1, class Arg2, class Arg3> template<class R, class Arg1, class Arg2, class Arg3>
ClosureInterface* NewClosure(R(*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) { ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
return new Closure3<R(*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3); return new Closure3<R (*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
} }
template<class R, class Obj> template<class R, class Obj>
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)()) { ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) {
return new ObjClosure0<Obj, R(Obj::*)()>(obj, fun); return new ObjClosure0<Obj, R (Obj::* )()>(obj, fun);
} }
template<class R, class Obj, class Arg1> template<class R, class Obj, class Arg1>
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1), Arg1 arg1) { ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) {
return new ObjClosure1<Obj, R(Obj::*)(Arg1), Arg1>(obj, fun, arg1); return new ObjClosure1<Obj, R (Obj::* )(Arg1), Arg1>(obj, fun, arg1);
} }
template<class R, class Obj, class Arg1, class Arg2> template<class R, class Obj, class Arg1, class Arg2>
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) { ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
return new ObjClosure2<Obj, R(Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2); return new ObjClosure2<Obj, R (Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
} }
template<class R, class Obj, class Arg1, class Arg2, class Arg3> template<class R, class Obj, class Arg1, class Arg2, class Arg3>
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) { ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
return new ObjClosure3<Obj, R(Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3); return new ObjClosure3<Obj, R (Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
} }
} // namespace limonp } // namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_COLOR_PRINT_HPP #ifndef LIMONP_COLOR_PRINT_HPP
#define LIMONP_COLOR_PRINT_HPP #define LIMONP_COLOR_PRINT_HPP
@ -27,21 +9,21 @@ namespace limonp {
using std::string; using std::string;
enum Color { enum Color {
BLACK = 30, BLACK = 30,
RED, RED,
GREEN, GREEN,
YELLOW, YELLOW,
BLUE, BLUE,
PURPLE PURPLE
}; // enum Color }; // enum Color
static void ColorPrintln(enum Color color, const char * fmt, ...) { static void ColorPrintln(enum Color color, const char * fmt, ...) {
va_list ap; va_list ap;
printf("\033[0;%dm", color); printf("\033[0;%dm", color);
va_start(ap, fmt); va_start(ap, fmt);
vprintf(fmt, ap); vprintf(fmt, ap);
va_end(ap); va_end(ap);
printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
} }
} // namespace limonp } // namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_CONDITION_HPP #ifndef LIMONP_CONDITION_HPP
#define LIMONP_CONDITION_HPP #define LIMONP_CONDITION_HPP
@ -24,31 +6,31 @@
namespace limonp { namespace limonp {
class Condition : NonCopyable { class Condition : NonCopyable {
public: public:
explicit Condition(MutexLock& mutex) explicit Condition(MutexLock& mutex)
: mutex_(mutex) { : mutex_(mutex) {
XCHECK(!pthread_cond_init(&pcond_, NULL)); XCHECK(!pthread_cond_init(&pcond_, NULL));
} }
~Condition() { ~Condition() {
XCHECK(!pthread_cond_destroy(&pcond_)); XCHECK(!pthread_cond_destroy(&pcond_));
} }
void Wait() { void Wait() {
XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex())); XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
} }
void Notify() { void Notify() {
XCHECK(!pthread_cond_signal(&pcond_)); XCHECK(!pthread_cond_signal(&pcond_));
} }
void NotifyAll() { void NotifyAll() {
XCHECK(!pthread_cond_broadcast(&pcond_)); XCHECK(!pthread_cond_broadcast(&pcond_));
} }
private: private:
MutexLock& mutex_; MutexLock& mutex_;
pthread_cond_t pcond_; pthread_cond_t pcond_;
}; // class Condition }; // class Condition
} // namespace limonp } // namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
/************************************ /************************************
* file enc : utf8 * file enc : utf8
* author : wuyanyi09@gmail.com * author : wuyanyi09@gmail.com
@ -34,86 +16,86 @@ namespace limonp {
using namespace std; using namespace std;
class Config { class Config {
public: public:
explicit Config(const string& filePath) { explicit Config(const string& filePath) {
LoadFile(filePath); LoadFile(filePath);
} }
operator bool () { operator bool () {
return !map_.empty(); return !map_.empty();
} }
string Get(const string& key, const string& defaultvalue) const { string Get(const string& key, const string& defaultvalue) const {
map<string, string>::const_iterator it = map_.find(key); map<string, string>::const_iterator it = map_.find(key);
if(map_.end() != it) { if(map_.end() != it) {
return it->second; return it->second;
}
return defaultvalue;
} }
int Get(const string& key, int defaultvalue) const { return defaultvalue;
string str = Get(key, ""); }
if("" == str) { int Get(const string& key, int defaultvalue) const {
return defaultvalue; string str = Get(key, "");
} if("" == str) {
return atoi(str.c_str()); return defaultvalue;
} }
const char* operator [](const char* key) const { return atoi(str.c_str());
if(NULL == key) { }
return NULL; const char* operator [] (const char* key) const {
} if(NULL == key) {
map<string, string>::const_iterator it = map_.find(key); return NULL;
if(map_.end() != it) {
return it->second.c_str();
}
return NULL;
} }
map<string, string>::const_iterator it = map_.find(key);
if(map_.end() != it) {
return it->second.c_str();
}
return NULL;
}
string GetConfigInfo() const { string GetConfigInfo() const {
string res; string res;
res << *this; res << *this;
return res; return res;
}
private:
void LoadFile(const string& filePath) {
ifstream ifs(filePath.c_str());
assert(ifs);
string line;
vector<string> vecBuf;
size_t lineno = 0;
while(getline(ifs, line)) {
lineno ++;
Trim(line);
if(line.empty() || StartsWith(line, "#")) {
continue;
}
vecBuf.clear();
Split(line, vecBuf, "=");
if(2 != vecBuf.size()) {
fprintf(stderr, "line[%s] illegal.\n", line.c_str());
assert(false);
continue;
}
string& key = vecBuf[0];
string& value = vecBuf[1];
Trim(key);
Trim(value);
if(!map_.insert(make_pair(key, value)).second) {
fprintf(stderr, "key[%s] already exits.\n", key.c_str());
assert(false);
continue;
}
} }
ifs.close();
}
private: friend ostream& operator << (ostream& os, const Config& config);
void LoadFile(const string& filePath) {
ifstream ifs(filePath.c_str());
assert(ifs);
string line;
vector<string> vecBuf;
size_t lineno = 0;
while(getline(ifs, line)) {
lineno ++;
Trim(line);
if(line.empty() || StartsWith(line, "#")) {
continue;
}
vecBuf.clear();
Split(line, vecBuf, "=");
if(2 != vecBuf.size()) {
fprintf(stderr, "line[%s] illegal.\n", line.c_str());
assert(false);
continue;
}
string& key = vecBuf[0];
string& value = vecBuf[1];
Trim(key);
Trim(value);
if(!map_.insert(make_pair(key, value)).second) {
fprintf(stderr, "key[%s] already exits.\n", key.c_str());
assert(false);
continue;
}
}
ifs.close();
}
friend ostream& operator << (ostream& os, const Config& config); map<string, string> map_;
map<string, string> map_;
}; // class Config }; // class Config
inline ostream& operator << (ostream& os, const Config& config) { inline ostream& operator << (ostream& os, const Config& config) {
return os << config.map_; return os << config.map_;
} }
} // namespace limonp } // namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_FILELOCK_HPP #ifndef LIMONP_FILELOCK_HPP
#define LIMONP_FILELOCK_HPP #define LIMONP_FILELOCK_HPP
@ -33,58 +15,58 @@ namespace limonp {
using std::string; using std::string;
class FileLock { class FileLock {
public: public:
FileLock() : fd_(-1), ok_(true) { FileLock() : fd_(-1), ok_(true) {
}
~FileLock() {
if(fd_ > 0) {
Close();
} }
~FileLock() { }
if(fd_ > 0) { void Open(const string& fname) {
Close(); assert(fd_ == -1);
} fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
if(fd_ < 0) {
ok_ = false;
err_ = strerror(errno);
} }
void Open(const string& fname) { }
assert(fd_ == -1); void Close() {
fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644); ::close(fd_);
if(fd_ < 0) { }
ok_ = false; void Lock() {
err_ = strerror(errno); if(LockOrUnlock(fd_, true) < 0) {
} ok_ = false;
err_ = strerror(errno);
} }
void Close() { }
::close(fd_); void UnLock() {
} if(LockOrUnlock(fd_, false) < 0) {
void Lock() { ok_ = false;
if(LockOrUnlock(fd_, true) < 0) { err_ = strerror(errno);
ok_ = false;
err_ = strerror(errno);
}
}
void UnLock() {
if(LockOrUnlock(fd_, false) < 0) {
ok_ = false;
err_ = strerror(errno);
}
}
bool Ok() const {
return ok_;
}
string Error() const {
return err_;
}
private:
static int LockOrUnlock(int fd, bool lock) {
errno = 0;
struct flock f;
memset(&f, 0, sizeof(f));
f.l_type = (lock ? F_WRLCK : F_UNLCK);
f.l_whence = SEEK_SET;
f.l_start = 0;
f.l_len = 0; // Lock/unlock entire file
return fcntl(fd, F_SETLK, &f);
} }
}
bool Ok() const {
return ok_;
}
string Error() const {
return err_;
}
private:
static int LockOrUnlock(int fd, bool lock) {
errno = 0;
struct flock f;
memset(&f, 0, sizeof(f));
f.l_type = (lock ? F_WRLCK : F_UNLCK);
f.l_whence = SEEK_SET;
f.l_start = 0;
f.l_len = 0; // Lock/unlock entire file
return fcntl(fd, F_SETLK, &f);
}
int fd_; int fd_;
bool ok_; bool ok_;
string err_; string err_;
}; // class FileLock }; // class FileLock
}// namespace limonp }// namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_FORCE_PUBLIC_H #ifndef LIMONP_FORCE_PUBLIC_H
#define LIMONP_FORCE_PUBLIC_H #define LIMONP_FORCE_PUBLIC_H

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_LOCAL_VECTOR_HPP #ifndef LIMONP_LOCAL_VECTOR_HPP
#define LIMONP_LOCAL_VECTOR_HPP #define LIMONP_LOCAL_VECTOR_HPP
@ -33,123 +15,126 @@ using namespace std;
const size_t LOCAL_VECTOR_BUFFER_SIZE = 16; const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
template <class T> template <class T>
class LocalVector { class LocalVector {
public: public:
typedef const T* const_iterator ; typedef const T* const_iterator ;
typedef T value_type; typedef T value_type;
typedef size_t size_type; typedef size_t size_type;
private: private:
T buffer_[LOCAL_VECTOR_BUFFER_SIZE]; T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
T * ptr_; T * ptr_;
size_t size_; size_t size_;
size_t capacity_; size_t capacity_;
public: public:
LocalVector() { LocalVector() {
init_(); init_();
}; };
LocalVector(const LocalVector<T>& vec) { LocalVector(const LocalVector<T>& vec) {
init_(); init_();
*this = vec; *this = vec;
}
LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster
init_();
while(begin != end) {
push_back(*begin++);
} }
LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster }
init_(); LocalVector(size_t size, const T& t) { // TODO: make it faster
while(begin != end) { init_();
push_back(*begin++); while(size--) {
} push_back(t);
} }
LocalVector(size_t size, const T& t) { // TODO: make it faster }
init_(); ~LocalVector() {
while(size--) { if(ptr_ != buffer_) {
push_back(t); free(ptr_);
}
} }
~LocalVector() { };
if(ptr_ != buffer_) { public:
free(ptr_); LocalVector<T>& operator = (const LocalVector<T>& vec) {
} if(this == &vec){
}; return *this;
public: }
LocalVector<T>& operator = (const LocalVector<T>& vec) { clear();
clear(); size_ = vec.size();
size_ = vec.size(); capacity_ = vec.capacity();
capacity_ = vec.capacity(); if(vec.buffer_ == vec.ptr_) {
if(vec.buffer_ == vec.ptr_) { memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
memcpy(buffer_, vec.buffer_, sizeof(T) * size_); ptr_ = buffer_;
ptr_ = buffer_; } else {
} else { ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
ptr_ = (T*) malloc(vec.capacity() * sizeof(T)); assert(ptr_);
assert(ptr_); memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
}
return *this;
} }
private: return *this;
void init_() { }
ptr_ = buffer_; private:
size_ = 0; void init_() {
capacity_ = LOCAL_VECTOR_BUFFER_SIZE; ptr_ = buffer_;
size_ = 0;
capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
}
public:
T& operator [] (size_t i) {
return ptr_[i];
}
const T& operator [] (size_t i) const {
return ptr_[i];
}
void push_back(const T& t) {
if(size_ == capacity_) {
assert(capacity_);
reserve(capacity_ * 2);
} }
public: ptr_[size_ ++ ] = t;
T& operator [](size_t i) { }
return ptr_[i]; void reserve(size_t size) {
if(size <= capacity_) {
return;
} }
const T& operator [](size_t i) const { T * next = (T*)malloc(sizeof(T) * size);
return ptr_[i]; assert(next);
T * old = ptr_;
ptr_ = next;
memcpy(ptr_, old, sizeof(T) * capacity_);
capacity_ = size;
if(old != buffer_) {
free(old);
} }
void push_back(const T& t) { }
if(size_ == capacity_) { bool empty() const {
assert(capacity_); return 0 == size();
reserve(capacity_ * 2); }
} size_t size() const {
ptr_[size_ ++ ] = t; return size_;
} }
void reserve(size_t size) { size_t capacity() const {
if(size <= capacity_) { return capacity_;
return; }
} const_iterator begin() const {
T * next = (T*)malloc(sizeof(T) * size); return ptr_;
assert(next); }
T * old = ptr_; const_iterator end() const {
ptr_ = next; return ptr_ + size_;
memcpy(ptr_, old, sizeof(T) * capacity_); }
capacity_ = size; void clear() {
if(old != buffer_) { if(ptr_ != buffer_) {
free(old); free(ptr_);
}
}
bool empty() const {
return 0 == size();
}
size_t size() const {
return size_;
}
size_t capacity() const {
return capacity_;
}
const_iterator begin() const {
return ptr_;
}
const_iterator end() const {
return ptr_ + size_;
}
void clear() {
if(ptr_ != buffer_) {
free(ptr_);
}
init_();
} }
init_();
}
}; };
template <class T> template <class T>
ostream & operator << (ostream& os, const LocalVector<T>& vec) { ostream & operator << (ostream& os, const LocalVector<T>& vec) {
if(vec.empty()) { if(vec.empty()) {
return os << "[]"; return os << "[]";
} }
os << "[\"" << vec[0]; os<<"[\""<<vec[0];
for(size_t i = 1; i < vec.size(); i++) { for(size_t i = 1; i < vec.size(); i++) {
os << "\", \"" << vec[i]; os<<"\", \""<<vec[i];
} }
os << "\"]"; os<<"\"]";
return os; return os;
} }
} }

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_LOGGING_HPP #ifndef LIMONP_LOGGING_HPP
#define LIMONP_LOGGING_HPP #define LIMONP_LOGGING_HPP
@ -38,55 +20,56 @@
namespace limonp { namespace limonp {
enum { enum {
LL_DEBUG = 0, LL_DEBUG = 0,
LL_INFO = 1, LL_INFO = 1,
LL_WARNING = 2, LL_WARNING = 2,
LL_ERROR = 3, LL_ERROR = 3,
LL_FATAL = 4, LL_FATAL = 4,
}; // enum }; // enum
static const char * LOG_LEVEL_ARRAY[] = {"DEBUG", "INFO", "WARN", "ERROR", "FATAL"}; static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S";
class Logger { class Logger {
public: public:
Logger(size_t level, const char* filename, int lineno) Logger(size_t level, const char* filename, int lineno)
: level_(level) { : level_(level) {
#ifdef LOGGING_LEVEL #ifdef LOGGING_LEVEL
if(level_ < LOGGING_LEVEL) { if (level_ < LOGGING_LEVEL) {
return; return;
} }
#endif #endif
assert(level_ <= sizeof(LOG_LEVEL_ARRAY) / sizeof(*LOG_LEVEL_ARRAY)); assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
char buf[32]; char buf[32];
time_t now; time_t now;
time(&now); time(&now);
strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&now)); struct tm result;
stream_ << buf localtime_r(&now, &result);
<< " " << filename strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &result);
<< ":" << lineno stream_ << buf
<< " " << LOG_LEVEL_ARRAY[level_] << " " << filename
<< " "; << ":" << lineno
} << " " << LOG_LEVEL_ARRAY[level_]
~Logger() { << " ";
}
~Logger() {
#ifdef LOGGING_LEVEL #ifdef LOGGING_LEVEL
if(level_ < LOGGING_LEVEL) { if (level_ < LOGGING_LEVEL) {
return; return;
} }
#endif #endif
std::cerr << stream_.str() << std::endl; std::cerr << stream_.str() << std::endl;
if(level_ == LL_FATAL) { if (level_ == LL_FATAL) {
abort(); abort();
}
} }
}
std::ostream& Stream() { std::ostream& Stream() {
return stream_; return stream_;
} }
private: private:
std::ostringstream stream_; std::ostringstream stream_;
size_t level_; size_t level_;
}; // class Logger }; // class Logger
} // namespace limonp } // namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_MUTEX_LOCK_HPP #ifndef LIMONP_MUTEX_LOCK_HPP
#define LIMONP_MUTEX_LOCK_HPP #define LIMONP_MUTEX_LOCK_HPP
@ -26,40 +8,40 @@
namespace limonp { namespace limonp {
class MutexLock: NonCopyable { class MutexLock: NonCopyable {
public: public:
MutexLock() { MutexLock() {
XCHECK(!pthread_mutex_init(&mutex_, NULL)); XCHECK(!pthread_mutex_init(&mutex_, NULL));
} }
~MutexLock() { ~MutexLock() {
XCHECK(!pthread_mutex_destroy(&mutex_)); XCHECK(!pthread_mutex_destroy(&mutex_));
} }
pthread_mutex_t* GetPthreadMutex() { pthread_mutex_t* GetPthreadMutex() {
return &mutex_; return &mutex_;
} }
private: private:
void Lock() { void Lock() {
XCHECK(!pthread_mutex_lock(&mutex_)); XCHECK(!pthread_mutex_lock(&mutex_));
} }
void Unlock() { void Unlock() {
XCHECK(!pthread_mutex_unlock(&mutex_)); XCHECK(!pthread_mutex_unlock(&mutex_));
} }
friend class MutexLockGuard; friend class MutexLockGuard;
pthread_mutex_t mutex_; pthread_mutex_t mutex_;
}; // class MutexLock }; // class MutexLock
class MutexLockGuard: NonCopyable { class MutexLockGuard: NonCopyable {
public: public:
explicit MutexLockGuard(MutexLock & mutex) explicit MutexLockGuard(MutexLock & mutex)
: mutex_(mutex) { : mutex_(mutex) {
mutex_.Lock(); mutex_.Lock();
} }
~MutexLockGuard() { ~MutexLockGuard() {
mutex_.Unlock(); mutex_.Unlock();
} }
private: private:
MutexLock & mutex_; MutexLock & mutex_;
}; // class MutexLockGuard }; // class MutexLockGuard
#define MutexLockGuard(x) XCHECK(false); #define MutexLockGuard(x) XCHECK(false);

View File

@ -1,35 +1,19 @@
/* /************************************
* Copyright (C) 2020, KylinSoft Co., Ltd. ************************************/
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_NONCOPYABLE_H #ifndef LIMONP_NONCOPYABLE_H
#define LIMONP_NONCOPYABLE_H #define LIMONP_NONCOPYABLE_H
namespace limonp { namespace limonp {
class NonCopyable { class NonCopyable {
protected: protected:
NonCopyable() { NonCopyable() {
} }
~NonCopyable() { ~NonCopyable() {
} }
private: private:
NonCopyable(const NonCopyable&); NonCopyable(const NonCopyable& );
const NonCopyable& operator=(const NonCopyable&); const NonCopyable& operator=(const NonCopyable& );
}; // class NonCopyable }; // class NonCopyable
} // namespace limonp } // namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_STD_EXTEMSION_HPP #ifndef LIMONP_STD_EXTEMSION_HPP
#define LIMONP_STD_EXTEMSION_HPP #define LIMONP_STD_EXTEMSION_HPP
@ -51,123 +33,123 @@ namespace std {
template<typename T> template<typename T>
ostream& operator << (ostream& os, const vector<T>& v) { ostream& operator << (ostream& os, const vector<T>& v) {
if(v.empty()) { if(v.empty()) {
return os << "[]"; return os << "[]";
} }
os << "[" << v[0]; os<<"["<<v[0];
for(size_t i = 1; i < v.size(); i++) { for(size_t i = 1; i < v.size(); i++) {
os << ", " << v[i]; os<<", "<<v[i];
} }
os << "]"; os<<"]";
return os; return os;
} }
template<> template<>
inline ostream& operator << (ostream& os, const vector<string>& v) { inline ostream& operator << (ostream& os, const vector<string>& v) {
if(v.empty()) { if(v.empty()) {
return os << "[]"; return os << "[]";
} }
os << "[\"" << v[0]; os<<"[\""<<v[0];
for(size_t i = 1; i < v.size(); i++) { for(size_t i = 1; i < v.size(); i++) {
os << "\", \"" << v[i]; os<<"\", \""<<v[i];
} }
os << "\"]"; os<<"\"]";
return os; return os;
} }
template<typename T> template<typename T>
ostream& operator << (ostream& os, const deque<T>& dq) { ostream& operator << (ostream& os, const deque<T>& dq) {
if(dq.empty()) { if(dq.empty()) {
return os << "[]"; return os << "[]";
} }
os << "[\"" << dq[0]; os<<"[\""<<dq[0];
for(size_t i = 1; i < dq.size(); i++) { for(size_t i = 1; i < dq.size(); i++) {
os << "\", \"" << dq[i]; os<<"\", \""<<dq[i];
} }
os << "\"]"; os<<"\"]";
return os; return os;
} }
template<class T1, class T2> template<class T1, class T2>
ostream& operator << (ostream& os, const pair<T1, T2>& pr) { ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
os << pr.first << ":" << pr.second ; os << pr.first << ":" << pr.second ;
return os; return os;
} }
template<class T> template<class T>
string& operator << (string& str, const T& obj) { string& operator << (string& str, const T& obj) {
stringstream ss; stringstream ss;
ss << obj; // call ostream& operator << (ostream& os, ss << obj; // call ostream& operator << (ostream& os,
return str = ss.str(); return str = ss.str();
} }
template<class T1, class T2> template<class T1, class T2>
ostream& operator << (ostream& os, const map<T1, T2>& mp) { ostream& operator << (ostream& os, const map<T1, T2>& mp) {
if(mp.empty()) { if(mp.empty()) {
os << "{}"; os<<"{}";
return os;
}
os << '{';
typename map<T1, T2>::const_iterator it = mp.begin();
os << *it;
it++;
while(it != mp.end()) {
os << ", " << *it;
it++;
}
os << '}';
return os; return os;
}
os<<'{';
typename map<T1, T2>::const_iterator it = mp.begin();
os<<*it;
it++;
while(it != mp.end()) {
os<<", "<<*it;
it++;
}
os<<'}';
return os;
} }
template<class T1, class T2> template<class T1, class T2>
ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) { ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
if(mp.empty()) { if(mp.empty()) {
return os << "{}"; return os << "{}";
} }
os << '{'; os<<'{';
typename std::unordered_map<T1, T2>::const_iterator it = mp.begin(); typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
os << *it; os<<*it;
it++; it++;
while(it != mp.end()) { while(it != mp.end()) {
os << ", " << *it++; os<<", "<<*it++;
} }
return os << '}'; return os<<'}';
} }
template<class T> template<class T>
ostream& operator << (ostream& os, const set<T>& st) { ostream& operator << (ostream& os, const set<T>& st) {
if(st.empty()) { if(st.empty()) {
os << "{}"; os << "{}";
return os;
}
os << '{';
typename set<T>::const_iterator it = st.begin();
os << *it;
it++;
while(it != st.end()) {
os << ", " << *it;
it++;
}
os << '}';
return os; return os;
}
os<<'{';
typename set<T>::const_iterator it = st.begin();
os<<*it;
it++;
while(it != st.end()) {
os<<", "<<*it;
it++;
}
os<<'}';
return os;
} }
template<class KeyType, class ContainType> template<class KeyType, class ContainType>
bool IsIn(const ContainType& contain, const KeyType& key) { bool IsIn(const ContainType& contain, const KeyType& key) {
return contain.end() != contain.find(key); return contain.end() != contain.find(key);
} }
template<class T> template<class T>
basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) { basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>()); return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
} }
template<class T> template<class T>
ofstream & operator << (ofstream & ofs, const basic_string<T>& s) { ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
ostreambuf_iterator<T> itr(ofs); ostreambuf_iterator<T> itr (ofs);
copy(s.begin(), s.end(), itr); copy(s.begin(), s.end(), itr);
return ofs; return ofs;
} }
} // namespace std } // namespace std

View File

@ -1,27 +1,14 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
/************************************ /************************************
* file enc : ascii * file enc : ascii
* author : wuyanyi09@gmail.com * author : wuyanyi09@gmail.com
************************************/ ************************************/
#ifndef LIMONP_STR_FUNCTS_H #ifndef LIMONP_STR_FUNCTS_H
#define LIMONP_STR_FUNCTS_H #define LIMONP_STR_FUNCTS_H
#include <stdint.h>
#include <stdio.h>
#include <stdarg.h>
#include <memory.h>
#include <sys/types.h>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <string> #include <string>
@ -29,14 +16,9 @@
#include <algorithm> #include <algorithm>
#include <cctype> #include <cctype>
#include <map> #include <map>
#include <stdint.h>
#include <stdio.h>
#include <stdarg.h>
#include <memory.h>
#include <functional> #include <functional>
#include <locale> #include <locale>
#include <sstream> #include <sstream>
#include <sys/types.h>
#include <iterator> #include <iterator>
#include <algorithm> #include <algorithm>
#include "StdExtension.hpp" #include "StdExtension.hpp"
@ -44,339 +26,356 @@
namespace limonp { namespace limonp {
using namespace std; using namespace std;
inline string StringFormat(const char* fmt, ...) { inline string StringFormat(const char* fmt, ...) {
int size = 256; int size = 256;
std::string str; std::string str;
va_list ap; va_list ap;
while(1) { while (1) {
str.resize(size); str.resize(size);
va_start(ap, fmt); va_start(ap, fmt);
int n = vsnprintf((char *)str.c_str(), size, fmt, ap); int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
va_end(ap); va_end(ap);
if(n > -1 && n < size) { if (n > -1 && n < size) {
str.resize(n); str.resize(n);
return str; return str;
}
if(n > -1)
size = n + 1;
else
size *= 2;
} }
return str; if (n > -1)
size = n + 1;
else
size *= 2;
}
return str;
} }
template<class T> template<class T>
void Join(T begin, T end, string& res, const string& connector) { void Join(T begin, T end, string& res, const string& connector) {
if(begin == end) { if(begin == end) {
return; return;
} }
stringstream ss; stringstream ss;
ss << *begin; ss<<*begin;
begin++; begin++;
while(begin != end) { while(begin != end) {
ss << connector << *begin; ss << connector << *begin;
begin ++; begin ++;
} }
res = ss.str(); res = ss.str();
} }
template<class T> template<class T>
string Join(T begin, T end, const string& connector) { string Join(T begin, T end, const string& connector) {
string res; string res;
Join(begin, end, res, connector); Join(begin ,end, res, connector);
return res; return res;
} }
inline string& Upper(string& str) { inline string& Upper(string& str) {
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper); transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
return str; return str;
} }
inline string& Lower(string& str) { inline string& Lower(string& str) {
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower); transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
return str; return str;
} }
inline bool IsSpace(unsigned c) { inline bool IsSpace(unsigned c) {
// when passing large int as the argument of isspace, it core dump, so here need a type cast. // when passing large int as the argument of isspace, it core dump, so here need a type cast.
return c > 0xff ? false : std::isspace(c & 0xff) != 0; return c > 0xff ? false : std::isspace(c & 0xff);
} }
inline std::string& LTrim(std::string &s) { inline std::string& LTrim(std::string &s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace)))); s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
return s; return s;
} }
inline std::string& RTrim(std::string &s) { inline std::string& RTrim(std::string &s) {
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end()); s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
return s; return s;
} }
inline std::string& Trim(std::string &s) { inline std::string& Trim(std::string &s) {
return LTrim(RTrim(s)); return LTrim(RTrim(s));
} }
inline std::string& LTrim(std::string & s, char x) { inline std::string& LTrim(std::string & s, char x) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x)))); s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
return s; return s;
} }
inline std::string& RTrim(std::string & s, char x) { inline std::string& RTrim(std::string & s, char x) {
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end()); s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
return s; return s;
} }
inline std::string& Trim(std::string &s, char x) { inline std::string& Trim(std::string &s, char x) {
return LTrim(RTrim(s, x), x); return LTrim(RTrim(s, x), x);
} }
inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) { inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
res.clear(); res.clear();
size_t Start = 0; size_t Start = 0;
size_t end = 0; size_t end = 0;
string sub; string sub;
while(Start < src.size()) { while(Start < src.size()) {
end = src.find_first_of(pattern, Start); end = src.find_first_of(pattern, Start);
if(string::npos == end || res.size() >= maxsplit) { if(string::npos == end || res.size() >= maxsplit) {
sub = src.substr(Start); sub = src.substr(Start);
res.push_back(sub); res.push_back(sub);
return; return;
}
sub = src.substr(Start, end - Start);
res.push_back(sub);
Start = end + 1;
} }
return; sub = src.substr(Start, end - Start);
res.push_back(sub);
Start = end + 1;
}
return;
} }
inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) { inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
vector<string> res; vector<string> res;
Split(src, res, pattern, maxsplit); Split(src, res, pattern, maxsplit);
return res; return res;
} }
inline bool StartsWith(const string& str, const string& prefix) { inline bool StartsWith(const string& str, const string& prefix) {
if(prefix.length() > str.length()) { if(prefix.length() > str.length()) {
return false; return false;
} }
return 0 == str.compare(0, prefix.length(), prefix); return 0 == str.compare(0, prefix.length(), prefix);
} }
inline bool EndsWith(const string& str, const string& suffix) { inline bool EndsWith(const string& str, const string& suffix) {
if(suffix.length() > str.length()) { if(suffix.length() > str.length()) {
return false; return false;
} }
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix); return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
} }
inline bool IsInStr(const string& str, char ch) { inline bool IsInStr(const string& str, char ch) {
return str.find(ch) != string::npos; return str.find(ch) != string::npos;
} }
inline uint16_t TwocharToUint16(char high, char low) { inline uint16_t TwocharToUint16(char high, char low) {
return (((uint16_t(high) & 0x00ff) << 8) | (uint16_t(low) & 0x00ff)); return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
} }
template <class Uint16Container> template <class Uint16Container>
bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) { bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
if(!str) { if(!str) {
return false; return false;
}
char ch1, ch2;
uint16_t tmp;
vec.clear();
for(size_t i = 0; i < len;) {
if(!(str[i] & 0x80)) { // 0xxxxxxx
vec.push_back(str[i]);
i++;
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
ch1 = (str[i] >> 2) & 0x07;
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
vec.push_back(tmp);
i += 2;
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
vec.push_back(tmp);
i += 3;
} else {
return false;
} }
char ch1, ch2; }
uint16_t tmp; return true;
vec.clear();
for(size_t i = 0; i < len;) {
if(!(str[i] & 0x80)) { // 0xxxxxxx
vec.push_back(str[i]);
i++;
} else if((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
ch1 = (str[i] >> 2) & 0x07;
ch2 = (str[i + 1] & 0x3f) | ((str[i] & 0x03) << 6);
tmp = (((uint16_t(ch1) & 0x00ff) << 8) | (uint16_t(ch2) & 0x00ff));
vec.push_back(tmp);
i += 2;
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
ch1 = ((uint8_t)str[i] << 4) | ((str[i + 1] >> 2) & 0x0f);
ch2 = (((uint8_t)str[i + 1] << 6) & 0xc0) | (str[i + 2] & 0x3f);
tmp = (((uint16_t(ch1) & 0x00ff) << 8) | (uint16_t(ch2) & 0x00ff));
vec.push_back(tmp);
i += 3;
} else {
return false;
}
}
return true;
} }
template <class Uint16Container> template <class Uint16Container>
bool Utf8ToUnicode(const string& str, Uint16Container& vec) { bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
return Utf8ToUnicode(str.c_str(), str.size(), vec); return Utf8ToUnicode(str.c_str(), str.size(), vec);
}
template <class Uint32Container>
bool Utf8ToUnicode32(const char * str, size_t size, Uint32Container& vec) {
uint32_t tmp;
vec.clear();
for(size_t i = 0; i < size;) {
if(!(str[i] & 0x80)) { // 0xxxxxxx
// 7bit, total 7bit
tmp = (uint8_t)(str[i]) & 0x7f;
i++;
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < size) { // 110xxxxxx
// 5bit, total 5bit
tmp = (uint8_t)(str[i]) & 0x1f;
// 6bit, total 11bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+1]) & 0x3f;
i += 2;
} else if((uint8_t)str[i] <= 0xef && i + 2 < size) { // 1110xxxxxx
// 4bit, total 4bit
tmp = (uint8_t)(str[i]) & 0x0f;
// 6bit, total 10bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+1]) & 0x3f;
// 6bit, total 16bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+2]) & 0x3f;
i += 3;
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < size) { // 11110xxxx
// 3bit, total 3bit
tmp = (uint8_t)(str[i]) & 0x07;
// 6bit, total 9bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+1]) & 0x3f;
// 6bit, total 15bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+2]) & 0x3f;
// 6bit, total 21bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+3]) & 0x3f;
i += 4;
} else {
return false;
}
vec.push_back(tmp);
}
return true;
} }
template <class Uint32Container> template <class Uint32Container>
bool Utf8ToUnicode32(const string& str, Uint32Container& vec) { bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
uint32_t tmp; return Utf8ToUnicode32(str.data(), str.size(), vec);
vec.clear(); }
for(size_t i = 0; i < str.size();) {
if(!(str[i] & 0x80)) { // 0xxxxxxx
// 7bit, total 7bit
tmp = (uint8_t)(str[i]) & 0x7f;
i++;
} else if((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
// 5bit, total 5bit
tmp = (uint8_t)(str[i]) & 0x1f;
// 6bit, total 11bit inline int UnicodeToUtf8Bytes(uint32_t ui){
tmp <<= 6; if(ui <= 0x7f) {
tmp |= (uint8_t)(str[i + 1]) & 0x3f; return 1;
i += 2; } else if(ui <= 0x7ff) {
} else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx return 2;
// 4bit, total 4bit } else if(ui <= 0xffff) {
tmp = (uint8_t)(str[i]) & 0x0f; return 3;
} else {
// 6bit, total 10bit return 4;
tmp <<= 6;
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
// 6bit, total 16bit
tmp <<= 6;
tmp |= (uint8_t)(str[i + 2]) & 0x3f;
i += 3;
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
// 3bit, total 3bit
tmp = (uint8_t)(str[i]) & 0x07;
// 6bit, total 9bit
tmp <<= 6;
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
// 6bit, total 15bit
tmp <<= 6;
tmp |= (uint8_t)(str[i + 2]) & 0x3f;
// 6bit, total 21bit
tmp <<= 6;
tmp |= (uint8_t)(str[i + 3]) & 0x3f;
i += 4;
} else {
return false;
}
vec.push_back(tmp);
} }
return true;
} }
template <class Uint32ContainerConIter> template <class Uint32ContainerConIter>
void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) { void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
res.clear(); res.clear();
uint32_t ui; uint32_t ui;
while(begin != end) { while(begin != end) {
ui = *begin; ui = *begin;
if(ui <= 0x7f) { if(ui <= 0x7f) {
res += char(ui); res += char(ui);
} else if(ui <= 0x7ff) { } else if(ui <= 0x7ff) {
res += char(((ui >> 6) & 0x1f) | 0xc0); res += char(((ui >> 6) & 0x1f) | 0xc0);
res += char((ui & 0x3f) | 0x80); res += char((ui & 0x3f) | 0x80);
} else if(ui <= 0xffff) { } else if(ui <= 0xffff) {
res += char(((ui >> 12) & 0x0f) | 0xe0); res += char(((ui >> 12) & 0x0f) | 0xe0);
res += char(((ui >> 6) & 0x3f) | 0x80); res += char(((ui >> 6) & 0x3f) | 0x80);
res += char((ui & 0x3f) | 0x80); res += char((ui & 0x3f) | 0x80);
} else { } else {
res += char(((ui >> 18) & 0x03) | 0xf0); res += char(((ui >> 18) & 0x03) | 0xf0);
res += char(((ui >> 12) & 0x3f) | 0x80); res += char(((ui >> 12) & 0x3f) | 0x80);
res += char(((ui >> 6) & 0x3f) | 0x80); res += char(((ui >> 6) & 0x3f) | 0x80);
res += char((ui & 0x3f) | 0x80); res += char((ui & 0x3f) | 0x80);
}
begin ++;
} }
begin ++;
}
} }
template <class Uint16ContainerConIter> template <class Uint16ContainerConIter>
void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) { void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
res.clear(); res.clear();
uint16_t ui; uint16_t ui;
while(begin != end) { while(begin != end) {
ui = *begin; ui = *begin;
if(ui <= 0x7f) { if(ui <= 0x7f) {
res += char(ui); res += char(ui);
} else if(ui <= 0x7ff) { } else if(ui <= 0x7ff) {
res += char(((ui >> 6) & 0x1f) | 0xc0); res += char(((ui>>6) & 0x1f) | 0xc0);
res += char((ui & 0x3f) | 0x80); res += char((ui & 0x3f) | 0x80);
} else { } else {
res += char(((ui >> 12) & 0x0f) | 0xe0); res += char(((ui >> 12) & 0x0f )| 0xe0);
res += char(((ui >> 6) & 0x3f) | 0x80); res += char(((ui>>6) & 0x3f )| 0x80 );
res += char((ui & 0x3f) | 0x80); res += char((ui & 0x3f) | 0x80);
}
begin ++;
} }
begin ++;
}
} }
template <class Uint16Container> template <class Uint16Container>
bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) { bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
vec.clear(); vec.clear();
if(!str) { if(!str) {
return true;
}
size_t i = 0;
while(i < len) {
if(0 == (str[i] & 0x80)) {
vec.push_back(uint16_t(str[i]));
i++;
} else {
if(i + 1 < len) { //&& (str[i+1] & 0x80))
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff) << 8) | (uint16_t(str[i + 1]) & 0x00ff));
vec.push_back(tmp);
i += 2;
} else {
return false;
}
}
}
return true; return true;
}
size_t i = 0;
while(i < len) {
if(0 == (str[i] & 0x80)) {
vec.push_back(uint16_t(str[i]));
i++;
} else {
if(i + 1 < len) { //&& (str[i+1] & 0x80))
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
vec.push_back(tmp);
i += 2;
} else {
return false;
}
}
}
return true;
} }
template <class Uint16Container> template <class Uint16Container>
bool GBKTrans(const string& str, Uint16Container& vec) { bool GBKTrans(const string& str, Uint16Container& vec) {
return GBKTrans(str.c_str(), str.size(), vec); return GBKTrans(str.c_str(), str.size(), vec);
} }
template <class Uint16ContainerConIter> template <class Uint16ContainerConIter>
void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) { void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
res.clear(); res.clear();
//pair<char, char> pa; //pair<char, char> pa;
char first, second; char first, second;
while(begin != end) { while(begin != end) {
//pa = uint16ToChar2(*begin); //pa = uint16ToChar2(*begin);
first = ((*begin) >> 8) & 0x00ff; first = ((*begin)>>8) & 0x00ff;
second = (*begin) & 0x00ff; second = (*begin) & 0x00ff;
if(first & 0x80) { if(first & 0x80) {
res += first; res += first;
res += second; res += second;
} else { } else {
res += second; res += second;
}
begin++;
} }
begin++;
}
} }
/* /*
* format example: "%Y-%m-%d %H:%M:%S" * format example: "%Y-%m-%d %H:%M:%S"
*/ */
inline void GetTime(const string& format, string& timeStr) { // inline void GetTime(const string& format, string& timeStr) {
time_t timeNow; // time_t timeNow;
time(&timeNow); // time(&timeNow);
timeStr.resize(64); // timeStr.resize(64);
size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow)); // size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
timeStr.resize(len); // timeStr.resize(len);
} // }
inline string PathJoin(const string& path1, const string& path2) { inline string PathJoin(const string& path1, const string& path2) {
if(EndsWith(path1, "/")) { if(EndsWith(path1, "/")) {
return path1 + path2; return path1 + path2;
} }
return path1 + "/" + path2; return path1 + "/" + path2;
} }
} }

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_THREAD_HPP #ifndef LIMONP_THREAD_HPP
#define LIMONP_THREAD_HPP #define LIMONP_THREAD_HPP
@ -25,36 +7,36 @@
namespace limonp { namespace limonp {
class IThread: NonCopyable { class IThread: NonCopyable {
public: public:
IThread(): isStarted(false), isJoined(false) { IThread(): isStarted(false), isJoined(false) {
}
virtual ~IThread() {
if(isStarted && !isJoined) {
XCHECK(!pthread_detach(thread_));
} }
virtual ~IThread() { };
if(isStarted && !isJoined) {
XCHECK(!pthread_detach(thread_));
}
};
virtual void Run() = 0; virtual void Run() = 0;
void Start() { void Start() {
XCHECK(!isStarted); XCHECK(!isStarted);
XCHECK(!pthread_create(&thread_, NULL, Worker, this)); XCHECK(!pthread_create(&thread_, NULL, Worker, this));
isStarted = true; isStarted = true;
} }
void Join() { void Join() {
XCHECK(!isJoined); XCHECK(!isJoined);
XCHECK(!pthread_join(thread_, NULL)); XCHECK(!pthread_join(thread_, NULL));
isJoined = true; isJoined = true;
} }
private: private:
static void * Worker(void * data) { static void * Worker(void * data) {
IThread * ptr = (IThread*) data; IThread * ptr = (IThread* ) data;
ptr->Run(); ptr->Run();
return NULL; return NULL;
} }
pthread_t thread_; pthread_t thread_;
bool isStarted; bool isStarted;
bool isJoined; bool isJoined;
}; // class IThread }; // class IThread
} // namespace limonp } // namespace limonp

View File

@ -1,21 +1,3 @@
/*
* Copyright (C) 2020, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*
*/
#ifndef LIMONP_THREAD_POOL_HPP #ifndef LIMONP_THREAD_POOL_HPP
#define LIMONP_THREAD_POOL_HPP #define LIMONP_THREAD_POOL_HPP
@ -30,73 +12,73 @@ using namespace std;
//class ThreadPool; //class ThreadPool;
class ThreadPool: NonCopyable { class ThreadPool: NonCopyable {
public: public:
class Worker: public IThread { class Worker: public IThread {
public: public:
Worker(ThreadPool* pool): ptThreadPool_(pool) { Worker(ThreadPool* pool): ptThreadPool_(pool) {
assert(ptThreadPool_); assert(ptThreadPool_);
}
virtual ~Worker() {
}
virtual void Run() {
while(true) {
ClosureInterface* closure = ptThreadPool_->queue_.Pop();
if(closure == NULL) {
break;
}
try {
closure->Run();
} catch(std::exception& e) {
XLOG(ERROR) << e.what();
} catch(...) {
XLOG(ERROR) << " unknown exception.";
}
delete closure;
}
}
private:
ThreadPool * ptThreadPool_;
}; // class Worker
ThreadPool(size_t thread_num)
: threads_(thread_num),
queue_(thread_num) {
assert(thread_num);
for(size_t i = 0; i < threads_.size(); i ++) {
threads_[i] = new Worker(this);
}
} }
~ThreadPool() { virtual ~Worker() {
Stop();
} }
void Start() { virtual void Run() {
for(size_t i = 0; i < threads_.size(); i++) { while (true) {
threads_[i]->Start(); ClosureInterface* closure = ptThreadPool_->queue_.Pop();
if (closure == NULL) {
break;
} }
} try {
void Stop() { closure->Run();
for(size_t i = 0; i < threads_.size(); i ++) { } catch(std::exception& e) {
queue_.Push(NULL); XLOG(ERROR) << e.what();
} catch(...) {
XLOG(ERROR) << " unknown exception.";
} }
for(size_t i = 0; i < threads_.size(); i ++) { delete closure;
threads_[i]->Join(); }
delete threads_[i];
}
threads_.clear();
} }
private:
ThreadPool * ptThreadPool_;
}; // class Worker
void Add(ClosureInterface* task) { ThreadPool(size_t thread_num)
assert(task); : threads_(thread_num),
queue_.Push(task); queue_(thread_num) {
assert(thread_num);
for(size_t i = 0; i < threads_.size(); i ++) {
threads_[i] = new Worker(this);
} }
}
~ThreadPool() {
Stop();
}
private: void Start() {
friend class Worker; for(size_t i = 0; i < threads_.size(); i++) {
threads_[i]->Start();
}
}
void Stop() {
for(size_t i = 0; i < threads_.size(); i ++) {
queue_.Push(NULL);
}
for(size_t i = 0; i < threads_.size(); i ++) {
threads_[i]->Join();
delete threads_[i];
}
threads_.clear();
}
vector<IThread*> threads_; void Add(ClosureInterface* task) {
BoundedBlockingQueue<ClosureInterface*> queue_; assert(task);
queue_.Push(task);
}
private:
friend class Worker;
vector<IThread*> threads_;
BoundedBlockingQueue<ClosureInterface*> queue_;
}; // class ThreadPool }; // class ThreadPool
} // namespace limonp } // namespace limonp

View File

@ -19,6 +19,8 @@ DEFINES += QT_DEPRECATED_WARNINGS
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
include(cppjieba/cppjieba.pri) include(cppjieba/cppjieba.pri)
#LIBS += -L/usr/local/lib/libjemalloc -ljemalloc
SOURCES += \ SOURCES += \
chinese-segmentation.cpp \ chinese-segmentation.cpp \

View File

@ -46,6 +46,7 @@ AppMatch::AppMatch(QObject *parent) : QThread(parent)
if(!m_interFace->isValid()) { if(!m_interFace->isValid()) {
qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message()); qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message());
} }
m_interFace->setTimeout(200);
qDebug() << "AppMatch is new"; qDebug() << "AppMatch is new";
} }
@ -227,10 +228,11 @@ void AppMatch::getDesktopFilePath() {
} }
void AppMatch::getAppName(QMap<NameString, QStringList> &installed) { void AppMatch::getAppName(QMap<NameString, QStringList> &installed) {
QMap<NameString, QStringList>::const_iterator i; // QMap<NameString, QStringList>::const_iterator i;
for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) { // for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) {
appNameMatch(i.key().app_name, installed); // appNameMatch(i.key().app_name, installed);
} // }
appNameMatch(installed);
qDebug() << "installed app match is successful!"; qDebug() << "installed app match is successful!";
} }
@ -275,12 +277,44 @@ void AppMatch::appNameMatch(QString appname, QMap<NameString, QStringList> &inst
} }
} }
} }
void AppMatch::appNameMatch(QMap<NameString, QStringList> &installed) {
QStringList list;
NameString name;
QMapIterator<NameString, QStringList> iter(m_installAppMap);
while(iter.hasNext()) {
iter.next();
list = iter.value();
name.app_name = iter.key().app_name;
if(iter.key().app_name.contains(m_sourceText, Qt::CaseInsensitive)) {
installed.insert(name, list);
continue;
}
QStringList pinyinlist;
pinyinlist = FileUtils::findMultiToneWords(iter.key().app_name);
for(int i = 0; i < pinyinlist.size() / 2; i++) {
QString shouzimu = pinyinlist.at(2 * i + 1); // 中文转首字母
if(shouzimu.contains(m_sourceText, Qt::CaseInsensitive)) {
installed.insert(name, list);
break;
}
if(m_sourceText.size() < 2)
break;
QString pinyin = pinyinlist.at(2 * i); // 中文转拼音
if(pinyin.contains(m_sourceText, Qt::CaseInsensitive)) {
installed.insert(name, list);
break;
}
}
}
}
void AppMatch::softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn) { void AppMatch::softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn) {
if(m_interFace->timeout() != -1) { // if(m_interFace->timeout() != -1) {
qWarning() << "softWareCente Dbus is timeout !"; // qWarning() << "softWareCente Dbus is timeout !";
return; // return;
} // }
slotDBusCallFinished(softwarereturn); slotDBusCallFinished(softwarereturn);
qDebug() << "softWareCenter match app is successful!"; qDebug() << "softWareCenter match app is successful!";
} }
@ -349,7 +383,7 @@ void AppMatch::run() {
QDir androidPath(QDir::homePath() + "/.local/share/applications/"); QDir androidPath(QDir::homePath() + "/.local/share/applications/");
if(androidPath.exists()) if(androidPath.exists())
this->getAllDesktopFilePath(QDir::homePath() + "/.local/share/applications/"); this->getAllDesktopFilePath(QDir::homePath() + "/.local/share/applications/");
connect(m_watchAppDir, &QFileSystemWatcher::directoryChanged, this, [ = ](const QString & path) { connect(m_watchAppDir, &QFileSystemWatcher::directoryChanged, this, [ = ](const QString & path) {
this->getDesktopFilePath(); this->getDesktopFilePath();
if(path == "/usr/share/applications/") { if(path == "/usr/share/applications/") {
this->getAllDesktopFilePath("/usr/share/applications/"); this->getAllDesktopFilePath("/usr/share/applications/");

View File

@ -65,6 +65,7 @@ private:
void getAppName(QMap<NameString, QStringList> &installed); void getAppName(QMap<NameString, QStringList> &installed);
// void appNameMatch(QString appname,QString desktoppath,QString appicon); // void appNameMatch(QString appname,QString desktoppath,QString appicon);
void appNameMatch(QString appname, QMap<NameString, QStringList> &installed); void appNameMatch(QString appname, QMap<NameString, QStringList> &installed);
void appNameMatch(QMap<NameString, QStringList> &installed);
void softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn); void softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn);

View File

@ -0,0 +1,219 @@
#include "app-search-plugin.h"
#include <gio/gdesktopappinfo.h>
#include <QWidget>
#include <QLabel>
using namespace Zeeker;
size_t AppSearchPlugin::uniqueSymbol = 0;
QMutex AppSearchPlugin::m_mutex;
AppSearchPlugin::AppSearchPlugin(QObject *parent) : QObject(parent)
{
SearchPluginIface::Actioninfo open { 0, tr("Open")};
SearchPluginIface::Actioninfo addtoDesktop { 1, tr("Add Shortcut to Desktop")};
SearchPluginIface::Actioninfo addtoPanel { 2, tr("Add Shortcut to Panel")};
SearchPluginIface::Actioninfo install { 0, tr("Install")};
m_actionInfo_installed << open << addtoDesktop << addtoPanel;
m_actionInfo_not_installed << install;
AppMatch::getAppMatch()->start();
m_pool.setMaxThreadCount(2);
m_pool.setExpiryTimeout(1000);
}
const QString AppSearchPlugin::name()
{
return tr("Applications Search");
}
const QString AppSearchPlugin::description()
{
return tr("Applications Search");
}
QString AppSearchPlugin::getPluginName()
{
return tr("Applications Search");
}
void AppSearchPlugin::KeywordSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult)
{
m_mutex.lock();
++uniqueSymbol;
m_mutex.unlock();
AppSearch *appsearch = new AppSearch(searchResult, keyword, uniqueSymbol);
m_pool.start(appsearch);
}
QList<SearchPluginIface::Actioninfo> AppSearchPlugin::getActioninfo(int type)
{
switch (type) {
case 0:
return m_actionInfo_installed;
break;
case 1:
return m_actionInfo_not_installed;
break;
default:
return QList<SearchPluginIface::Actioninfo>();
break;
}
}
void AppSearchPlugin::openAction(int actionkey, QString key, int type)
{
switch (type) {
case 0:
switch (actionkey) {
case 0:
if(!launch(key)) {
qWarning() << "Fail to launch:" << key;
}
break;
case 1:
if(!addDesktopShortcut(key)) {
qWarning() << "Fail to add Desktop Shortcut:" << key;
}
break;
case 2:
if(!addPanelShortcut(key)) {
qWarning() << "Fail to add Panel Shortcut:" << key;
}
break;
default:
break;
}
break;
case 1:
if(!installAppAction(key)) {
qWarning() << "Fail to install:" << key;
}
break;
default:
break;
}
}
bool AppSearchPlugin::isPreviewEnable(QString key, int type)
{
return false;
}
QWidget *AppSearchPlugin::previewPage(QString key, int type, QWidget *parent = nullptr)
{
return nullptr;
}
bool AppSearchPlugin::launch(const QString &path)
{
GDesktopAppInfo * desktopAppInfo = g_desktop_app_info_new_from_filename(path.toLocal8Bit().data());
bool res = static_cast<bool>(g_app_info_launch(G_APP_INFO(desktopAppInfo), nullptr, nullptr, nullptr));
g_object_unref(desktopAppInfo);
return res;
}
bool AppSearchPlugin::addPanelShortcut(const QString& path) {
QDBusInterface iface("com.ukui.panel.desktop",
"/",
"com.ukui.panel.desktop",
QDBusConnection::sessionBus());
if(iface.isValid()) {
QDBusReply<bool> isExist = iface.call("CheckIfExist", path);
if(isExist) {
qWarning() << "Add shortcut to panel failed, because it is already existed!";
return false;
}
QDBusReply<QVariant> ret = iface.call("AddToTaskbar", path);
qDebug() << "Add shortcut to panel successed!";
return true;
}
return false;
}
bool AppSearchPlugin::addDesktopShortcut(const QString& path) {
QString dirpath = QStandardPaths::writableLocation(QStandardPaths::DesktopLocation);
QFileInfo fileInfo(path);
QString desktopfn = fileInfo.fileName();
QFile file(path);
QString newName = QString(dirpath + "/" + desktopfn);
bool ret = file.copy(QString(dirpath + "/" + desktopfn));
if(ret) {
QProcess process;
process.startDetached(QString("chmod a+x %1").arg(newName));
return true;
}
return false;
}
bool AppSearchPlugin::installAppAction(const QString & name) {
QDBusInterface * interface = new QDBusInterface("com.kylin.softwarecenter",
"/com/kylin/softwarecenter",
"com.kylin.utiliface",
QDBusConnection::sessionBus());
if(interface->isValid()) {
//软件商店已打开,直接跳转
interface->call("show_search_result", name);
bool reply = QDBusReply<bool>(interface->call(QString("show_search_result"), name));
return reply;
} else {
//软件商店未打开,打开软件商店下载此软件
qDebug() << "Softwarecenter has not been launched, now launch it." << name;
QProcess process;
return process.startDetached(QString("kylin-software-center -find %1").arg(name));
}
}
AppSearch::AppSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, const QString &keyword, size_t uniqueSymbol)
{
this->setAutoDelete(true);
m_search_result = searchResult;
m_keyword = keyword;
m_uniqueSymbol = uniqueSymbol;
}
AppSearch::~AppSearch()
{
}
void AppSearch::run()
{
//These weird code is mean to be compatible with the old version UI.
AppMatch::getAppMatch()->startMatchApp(m_keyword, m_installed_apps, m_not_installed_apps);
QMapIterator<NameString, QStringList> i(m_installed_apps);
while (i.hasNext()) {
i.next();
SearchPluginIface::ResultInfo ri;
if(!QIcon::fromTheme(i.value().at(1)).isNull()) {
ri.icon = QIcon::fromTheme(i.value().at(1));
}else {
ri.icon = QIcon(":/res/icons/desktop.png");
}
ri.name = i.key().app_name;
ri.actionKey = i.value().at(0);
ri.type = 0; //0 means installed apps.
if (m_uniqueSymbol == AppSearchPlugin::uniqueSymbol) {
m_search_result->enqueue(ri);
} else {
break;
}
}
QMapIterator<NameString, QStringList> in(m_not_installed_apps);
while (in.hasNext()) {
in.next();
SearchPluginIface::ResultInfo ri;
if(!QIcon(in.value().at(1)).isNull()) {
ri.icon = QIcon(in.value().at(1));
}else {
ri.icon = QIcon(":/res/icons/desktop.png");
}
ri.name = in.key().app_name;
SearchPluginIface::DescriptionInfo di;
di.key = QString(tr("Application Description:"));
di.value = in.value().at(3);
ri.description.append(di);
ri.actionKey = in.value().at(2);
ri.type = 1; //1 means not installed apps.
if (m_uniqueSymbol == AppSearchPlugin::uniqueSymbol) {
m_search_result->enqueue(ri);
} else {
break;
}
}
}

View File

@ -0,0 +1,57 @@
#ifndef APPSEARCHPLUGIN_H
#define APPSEARCHPLUGIN_H
#include <QObject>
#include "search-plugin-iface.h"
#include "app-match.h"
#include "libsearch_global.h"
namespace Zeeker {
class LIBSEARCH_EXPORT AppSearchPlugin : public QObject, public SearchPluginIface
{
friend class AppSearch;
Q_OBJECT
public:
AppSearchPlugin(QObject *parent = nullptr);
PluginType pluginType() {return PluginType::SearchPlugin;}
const QString name();
const QString description();
const QIcon icon() {return QIcon::fromTheme("appsearch");}
void setEnable(bool enable) {m_enable = enable;}
bool isEnable() {return m_enable;}
QString getPluginName();
void KeywordSearch(QString keyword,DataQueue<ResultInfo> *searchResult);
QList<SearchPluginIface::Actioninfo> getActioninfo(int type);
void openAction(int actionkey, QString key, int type);
bool isPreviewEnable(QString key, int type);
QWidget *previewPage(QString key, int type, QWidget *parent);
private:
bool launch(const QString &path);
bool addPanelShortcut(const QString &path);
bool addDesktopShortcut(const QString &path);
bool installAppAction(const QString &name);
bool m_enable = true;
QList<SearchPluginIface::Actioninfo> m_actionInfo_installed;
QList<SearchPluginIface::Actioninfo> m_actionInfo_not_installed;
QThreadPool m_pool;
static size_t uniqueSymbol;
static QMutex m_mutex;
};
class AppSearch : public QObject, public QRunnable {
Q_OBJECT
public:
AppSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, const QString& keyword, size_t uniqueSymbol);
~AppSearch();
protected:
void run() override;
private:
DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr;
size_t m_uniqueSymbol;
QString m_keyword;
QMap<NameString, QStringList> m_installed_apps;
QMap<NameString, QStringList> m_not_installed_apps;
};
}
#endif // APPSEARCHPLUGIN_H

View File

@ -2,6 +2,8 @@ INCLUDEPATH += $$PWD
HEADERS += \ HEADERS += \
$$PWD/app-match.h \ $$PWD/app-match.h \
$$PWD/app-search-plugin.h
SOURCES += \ SOURCES += \
$$PWD/app-match.cpp \ $$PWD/app-match.cpp \
$$PWD/app-search-plugin.cpp

View File

@ -1,7 +1,25 @@
#ifndef COMMON_H #ifndef COMMON_H
#define COMMON_H #define COMMON_H
#include <QMap>
#define UKUI_SEARCH_PIPE_PATH (QDir::homePath()+"/.config/org.ukui/ukui-search/ukuisearch").toLocal8Bit().constData() #define UKUI_SEARCH_PIPE_PATH (QDir::homePath()+"/.config/org.ukui/ukui-search/ukuisearch").toLocal8Bit().constData()
#define FILE_SEARCH_VALUE "0"
#define DIR_SEARCH_VALUE "1"
#define HOME_PATH QDir::homePath()
static const QMap<QString, bool> targetFileTypeMap = {
std::map<QString, bool>::value_type("doc", true),
std::map<QString, bool>::value_type("docx", true),
std::map<QString, bool>::value_type("ppt", true),
std::map<QString, bool>::value_type("pptx", true),
std::map<QString, bool>::value_type("xls", true),
std::map<QString, bool>::value_type("xlsx", true),
std::map<QString, bool>::value_type("txt", true),
std::map<QString, bool>::value_type("dot", true),
std::map<QString, bool>::value_type("wps", true),
std::map<QString, bool>::value_type("pps", true),
std::map<QString, bool>::value_type("dps", true),
std::map<QString, bool>::value_type("et", true),
std::map<QString, bool>::value_type("pdf", true)
};
//TODO Put things that needed to be put here here. //TODO Put things that needed to be put here here.
#endif // COMMON_H #endif // COMMON_H

View File

@ -20,6 +20,7 @@
* *
*/ */
#include "file-utils.h" #include "file-utils.h"
#include <QXmlStreamReader>
using namespace Zeeker; using namespace Zeeker;
size_t FileUtils::_max_index_count = 0; size_t FileUtils::_max_index_count = 0;
@ -177,6 +178,22 @@ QString FileUtils::getSettingName(const QString& setting) {
return setting.right(setting.length() - setting.lastIndexOf("/") - 1); return setting.right(setting.length() - setting.lastIndexOf("/") - 1);
} }
bool FileUtils::isOrUnder(QString pathA, QString pathB)
{
if(pathA[0] != "/")
pathA.prepend("/");
if(pathB[0] != "/")
pathB.prepend("/");
if(pathA.length() < pathB.length())
return false;
if(pathA == pathB || pathA.startsWith(pathB + "/"))
return true;
return false;
}
void FileUtils::loadHanziTable(const QString &fileName) { void FileUtils::loadHanziTable(const QString &fileName) {
QFile file(fileName); QFile file(fileName);
@ -482,12 +499,30 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
if(!file.open(QuaZip::mdUnzip)) if(!file.open(QuaZip::mdUnzip))
return; return;
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) {
file.close();
return; return;
}
QuaZipFile fileR(&file); QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly); //读取方式打开 fileR.open(QIODevice::ReadOnly); //读取方式打开
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()){
if(reader.readNextStartElement() and reader.name().toString() == "t"){
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
break;
}
}
}
fileR.close();
file.close();
return;
/* //原加载DOM文档方式
QDomDocument doc; QDomDocument doc;
doc.setContent(fileR.readAll()); doc.setContent(fileR.readAll());
fileR.close(); fileR.close();
@ -499,7 +534,7 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
QDomElement wr = wp.firstChildElement("w:r"); QDomElement wr = wp.firstChildElement("w:r");
while(!wr.isNull()) { while(!wr.isNull()) {
QDomElement wt = wr.firstChildElement("w:t"); QDomElement wt = wr.firstChildElement("w:t");
textcontent.append(wt.text().replace("\n", "")); textcontent.append(wt.text().replace("\n", "")).replace("\r", " ");
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) { if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close(); file.close();
return; return;
@ -512,6 +547,7 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
} }
file.close(); file.close();
return; return;
*/
} }
void FileUtils::getPptxTextContent(QString &path, QString &textcontent) { void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
@ -527,8 +563,35 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
if(i.startsWith(prefix)) if(i.startsWith(prefix))
fileList << i; fileList << i;
} }
if(fileList.isEmpty()) if(fileList.isEmpty()) {
file.close();
return; return;
}
for(int i = 0; i < fileList.size(); ++i){
QString name = prefix + QString::number(i + 1) + ".xml";
if(!file.setCurrentFile(name)) {
continue;
}
QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly);
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()){
if(reader.readNextStartElement() and reader.name().toString() == "t"){
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
break;
}
}
}
fileR.close();
}
file.close();
return;
/*
QDomElement sptree; QDomElement sptree;
QDomElement sp; QDomElement sp;
QDomElement txbody; QDomElement txbody;
@ -596,6 +659,7 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
} }
file.close(); file.close();
return; return;
*/
} }
void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) { void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
@ -606,12 +670,30 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
if(!file.open(QuaZip::mdUnzip)) if(!file.open(QuaZip::mdUnzip))
return; return;
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) {
file.close();
return; return;
}
QuaZipFile fileR(&file); QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly); //读取方式打开 fileR.open(QIODevice::ReadOnly);
QXmlStreamReader reader(&fileR);
while (!reader.atEnd()){
if(reader.readNextStartElement() and reader.name().toString() == "t"){
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
break;
}
}
}
fileR.close();
file.close();
return;
/*
QDomDocument doc; QDomDocument doc;
doc.setContent(fileR.readAll()); doc.setContent(fileR.readAll());
fileR.close(); fileR.close();
@ -641,16 +723,19 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
} }
file.close(); file.close();
return; return;
*/
} }
void FileUtils::getPdfTextContent(QString &path, QString &textcontent) { void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
Poppler::Document *doc = Poppler::Document::load(path); Poppler::Document *doc = Poppler::Document::load(path);
if(doc->isLocked()) if(doc->isLocked()) {
delete doc;
return; return;
}
const QRectF qf; const QRectF qf;
int pageNum = doc->numPages(); int pageNum = doc->numPages();
for(int i = 0; i < pageNum; ++i) { for(int i = 0; i < pageNum; ++i) {
textcontent.append(doc->page(i)->text(qf).replace("\n", "")); textcontent.append(doc->page(i)->text(qf).replace("\n", "").replace("\r", " "));
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
break; break;
} }
@ -679,7 +764,7 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent) {
stream.setCodec(codec); stream.setCodec(codec);
uchardet_delete(chardet); uchardet_delete(chardet);
textcontent = stream.readAll().replace("\n", ""); textcontent = stream.readAll().replace("\n", "").replace("\r", " ");
file.close(); file.close();
encodedString.clear(); encodedString.clear();
@ -688,3 +773,18 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent) {
return; return;
} }
bool FileUtils::openFile(QString &path, bool openInDir)
{
if(openInDir) {
return QDesktopServices::openUrl(QUrl::fromLocalFile(path.left(path.lastIndexOf("/"))));
} else {
return QDesktopServices::openUrl(QUrl::fromLocalFile(path));
}
}
bool FileUtils::copyPath(QString &path)
{
QApplication::clipboard()->setText(path);
return true;
}

View File

@ -35,6 +35,9 @@
#include <QMap> #include <QMap>
#include <QDomDocument> #include <QDomDocument>
#include <QQueue> #include <QQueue>
#include <QDesktopServices>
#include <QApplication>
#include <QClipboard>
#include <quazip/quazipfile.h> #include <quazip/quazipfile.h>
#include <stdio.h> #include <stdio.h>
@ -67,6 +70,8 @@ public:
static QString getFileName(const QString &); static QString getFileName(const QString &);
static QString getAppName(const QString &); static QString getAppName(const QString &);
static QString getSettingName(const QString &); static QString getSettingName(const QString &);
//A is or under B
static bool isOrUnder(QString pathA, QString pathB);
//chinese character to pinyin //chinese character to pinyin
static QMap<QString, QStringList> map_chinese2pinyin; static QMap<QString, QStringList> map_chinese2pinyin;
@ -81,6 +86,9 @@ public:
static void getXlsxTextContent(QString &path, QString &textcontent); static void getXlsxTextContent(QString &path, QString &textcontent);
static void getPdfTextContent(QString &path, QString &textcontent); static void getPdfTextContent(QString &path, QString &textcontent);
static void getTxtContent(QString &path, QString &textcontent); static void getTxtContent(QString &path, QString &textcontent);
static bool openFile(QString &path, bool openInDir = false);
static bool copyPath(QString &path);
static size_t _max_index_count; static size_t _max_index_count;
static size_t _current_index_count; //this one has been Abandoned,do not use it. static size_t _current_index_count; //this one has been Abandoned,do not use it.
static unsigned short _index_status; static unsigned short _index_status;

View File

@ -83,13 +83,13 @@ GlobalSettings::GlobalSettings(QObject *parent) : QObject(parent) {
connect(m_theme_gsettings, &QGSettings::changed, this, [ = ](const QString & key) { connect(m_theme_gsettings, &QGSettings::changed, this, [ = ](const QString & key) {
if(key == STYLE_NAME_KEY) { if(key == STYLE_NAME_KEY) {
//当前主题改变时也发出paletteChanged信号通知主界面刷新 //当前主题改变时也发出paletteChanged信号通知主界面刷新
qApp->paletteChanged(qApp->palette());
m_cache.remove(STYLE_NAME_KEY); m_cache.remove(STYLE_NAME_KEY);
m_cache.insert(STYLE_NAME_KEY, m_theme_gsettings->get(STYLE_NAME_KEY).toString()); m_cache.insert(STYLE_NAME_KEY, m_theme_gsettings->get(STYLE_NAME_KEY).toString());
} else if(key == FONT_SIZE_KEY) {
qApp->paletteChanged(qApp->palette()); qApp->paletteChanged(qApp->palette());
} else if(key == FONT_SIZE_KEY) {
m_cache.remove(FONT_SIZE_KEY); m_cache.remove(FONT_SIZE_KEY);
m_cache.insert(FONT_SIZE_KEY, m_theme_gsettings->get(FONT_SIZE_KEY).toDouble()); m_cache.insert(FONT_SIZE_KEY, m_theme_gsettings->get(FONT_SIZE_KEY).toDouble());
qApp->paletteChanged(qApp->palette());
} else if (key == ICON_THEME_KEY) { } else if (key == ICON_THEME_KEY) {
qApp->paletteChanged(qApp->palette()); qApp->paletteChanged(qApp->palette());
} }
@ -146,24 +146,28 @@ bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool rem
m_block_dirs_settings->remove(path); m_block_dirs_settings->remove(path);
return true; return true;
} }
if(!path.startsWith("/home")) { // if(!path.startsWith("/home")) {
// returnCode = QString(tr("I can only search your user directory, it doesn't make any sense if you block an directory which is not in user directory!")); // returnCode = QString(tr("I can only search your user directory, it doesn't make any sense if you block an directory which is not in user directory!"));
returnCode = PATH_NOT_IN_HOME; // returnCode = PATH_NOT_IN_HOME;
return false; // return false;
} // }
//why QSetting's key can't start with "/"?? //why QSetting's key can't start with "/"??
QString pathKey = path.right(path.length() - 1); QString pathKey = path.right(path.length() - 1);
if (pathKey.endsWith(QLatin1Char('/'))) {
pathKey = pathKey.mid(0, pathKey.length() - 1);
}
QStringList blockDirs = m_block_dirs_settings->allKeys(); QStringList blockDirs = m_block_dirs_settings->allKeys();
for(QString i : blockDirs) { for(QString i : blockDirs) {
if(pathKey.startsWith(i)) { if(FileUtils::isOrUnder(pathKey, i)) {
// returnCode = QString(tr("My parent folder has been blocked!")); // returnCode = QString(tr("My parent folder has been blocked!"));
returnCode = PATH_PARENT_BLOCKED; returnCode = PATH_PARENT_BLOCKED;
return false; return false;
} }
if(i.startsWith(pathKey)) if(FileUtils::isOrUnder(i, pathKey))
m_block_dirs_settings->remove(i); m_block_dirs_settings->remove(i);
} }
m_block_dirs_settings->setValue(pathKey, "0"); m_block_dirs_settings->setValue(pathKey, "0");

View File

@ -36,6 +36,7 @@
#include <QDBusInterface> #include <QDBusInterface>
#include <QApplication> #include <QApplication>
#include "libsearch_global.h" #include "libsearch_global.h"
#include "file-utils.h"
#define CONTROL_CENTER_PERSONALISE_GSETTINGS_ID "org.ukui.control-center.personalise" #define CONTROL_CENTER_PERSONALISE_GSETTINGS_ID "org.ukui.control-center.personalise"
#define TRANSPARENCY_KEY "transparency" #define TRANSPARENCY_KEY "transparency"

View File

@ -34,8 +34,8 @@ ConstructDocumentForPath::ConstructDocumentForPath(QVector<QString> list) {
void ConstructDocumentForPath::run() { void ConstructDocumentForPath::run() {
// qDebug()<<"ConstructDocumentForPath"; // qDebug()<<"ConstructDocumentForPath";
if(!Zeeker::_doc_list_path) // if(!Zeeker::_doc_list_path)
Zeeker::_doc_list_path = new QList<Document>; // Zeeker::_doc_list_path = new QVector<Document>;
// qDebug()<<_doc_list_path->size(); // qDebug()<<_doc_list_path->size();
QString index_text = m_list.at(0).toLower(); QString index_text = m_list.at(0).toLower();
QString sourcePath = m_list.at(1); QString sourcePath = m_list.at(1);
@ -87,9 +87,9 @@ void ConstructDocumentForPath::run() {
} }
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc)); // QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
Zeeker::_mutex_doc_list_path.lock(); IndexGenerator::_mutex_doc_list_path.lock();
Zeeker::_doc_list_path->append(doc); IndexGenerator::_doc_list_path.append(doc);
Zeeker::_mutex_doc_list_path.unlock(); IndexGenerator::_mutex_doc_list_path.unlock();
// qDebug()<<"ConstructDocumentForPath finish"; // qDebug()<<"ConstructDocumentForPath finish";
return; return;
} }
@ -102,32 +102,39 @@ ConstructDocumentForContent::ConstructDocumentForContent(QString path) {
void ConstructDocumentForContent::run() { void ConstructDocumentForContent::run() {
// qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId(); // qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
// 构造文本索引的document // 构造文本索引的document
if(!Zeeker::_doc_list_content) // if(!Zeeker::_doc_list_content)
Zeeker::_doc_list_content = new QList<Document>; // Zeeker::_doc_list_content = new QVector<Document>;
QString content; QString content;
FileReader::getTextContent(m_path, content); FileReader::getTextContent(m_path, content);
if(content.isEmpty()) if(content.isEmpty())
return; return;
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path)); //QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); //QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000).toStdString());
Document doc; Document doc;
doc.setData(content); doc.setData(content);
doc.setUniqueTerm(uniqueterm); //doc.setUniqueTerm(uniqueterm);
doc.addTerm(upTerm); doc.setUniqueTerm(FileUtils::makeDocUterm(m_path));
//doc.addTerm(upTerm);
doc.addTerm(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
doc.addValue(m_path); doc.addValue(m_path);
for(int i = 0; i < term.size(); ++i) {
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
//'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
// QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
std::vector<cppjieba::KeyWord> term = ChineseSegmentation::getInstance()->callSegementStd(content.left(20480000).toStdString());
for(size_t i = 0; i < term.size(); ++i) {
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
} }
Zeeker::_mutex_doc_list_content.lock(); IndexGenerator::_mutex_doc_list_content.lock();
Zeeker::_doc_list_content->append(doc); IndexGenerator::_doc_list_content.append(doc);
Zeeker::_mutex_doc_list_content.unlock(); IndexGenerator::_mutex_doc_list_content.unlock();
content.clear(); content.clear();
content.squeeze(); content.squeeze();
term.clear(); term.clear();
term.shrink_to_fit();
return; return;
} }

View File

@ -0,0 +1,6 @@
#include "data-queue.h"
DataQueue::DataQueue()
{
}

View File

@ -0,0 +1,11 @@
#ifndef DATAQUEUE_H
#define DATAQUEUE_H
class DataQueue
{
public:
DataQueue();
};
#endif // DATAQUEUE_H

View File

@ -37,6 +37,17 @@ void Document::addPosting(std::string term, QVector<size_t> offset, int weight)
} }
} }
void Document::addPosting(std::string &term, std::vector<size_t> &offset, int weight) {
if(term == "")
return;
if(term.length() > 240)
term = QString::fromStdString(term).left(30).toStdString();
for(size_t i : offset) {
m_document.add_posting(term, i, weight);
}
}
void Document::addPosting(std::string term, unsigned int offset, int weight) { void Document::addPosting(std::string term, unsigned int offset, int weight) {
if(term == "") if(term == "")
return; return;
@ -52,6 +63,12 @@ void Document::addTerm(QString term) {
m_document.add_term(term.toStdString()); m_document.add_term(term.toStdString());
} }
void Document::addTerm(std::string term) {
if(term.empty())
return;
m_document.add_term(term);
}
void Document::addValue(QString value) { void Document::addValue(QString value) {
m_document.add_value(1, value.toStdString()); m_document.add_value(1, value.toStdString());
} }
@ -62,12 +79,20 @@ void Document::setUniqueTerm(QString term) {
m_document.add_term(term.toStdString()); m_document.add_term(term.toStdString());
// m_unique_term = new QString(term); // m_unique_term = new QString(term);
m_unique_term = std::move(term); m_unique_term = std::move(term.toStdString());
} }
void Document::setUniqueTerm(std::string term) {
if(term.empty())
return;
m_document.add_term(term);
m_unique_term = term;
}
std::string Document::getUniqueTerm() { std::string Document::getUniqueTerm() {
// qDebug()<<"m_unique_term!"<<*m_unique_term; // qDebug()<<"m_unique_term!"<<*m_unique_term;
// qDebug() << QString::fromStdString(m_unique_term.toStdString()); // qDebug() << QString::fromStdString(m_unique_term.toStdString());
return m_unique_term.toStdString(); return m_unique_term;//.toStdString();
} }
void Document::setIndexText(QStringList indexText) { void Document::setIndexText(QStringList indexText) {

View File

@ -41,10 +41,13 @@ public:
} }
void setData(QString &data); void setData(QString &data);
void addPosting(std::string term, QVector<size_t> offset, int weight = 1); void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
void addPosting(std::string &term, std::vector<size_t> &offset, int weight = 1);
void addPosting(std::string term, unsigned int offset, int weight = 1); void addPosting(std::string term, unsigned int offset, int weight = 1);
void addTerm(QString term); void addTerm(QString term);
void addTerm(std::string term);
void addValue(QString value); void addValue(QString value);
void setUniqueTerm(QString term); void setUniqueTerm(QString term);
void setUniqueTerm(std::string term);
std::string getUniqueTerm(); std::string getUniqueTerm();
void setIndexText(QStringList indexText); void setIndexText(QStringList indexText);
QStringList getIndexText(); QStringList getIndexText();
@ -52,7 +55,8 @@ public:
private: private:
Xapian::Document m_document; Xapian::Document m_document;
QStringList m_index_text; QStringList m_index_text;
QString m_unique_term; //QString m_unique_term;
std::string m_unique_term;
}; };
} }

View File

@ -0,0 +1,232 @@
#include "file-search-plugin.h"
#include "search-manager.h"
#include <QWidget>
#include <QLabel>
#include <QHBoxLayout>
using namespace Zeeker;
FileSearchPlugin::FileSearchPlugin(QObject *parent) : QObject(parent)
{
SearchPluginIface::Actioninfo open { 0, tr("Open")};
SearchPluginIface::Actioninfo Openpath { 1, tr("Open path")};
SearchPluginIface::Actioninfo CopyPath { 2, tr("Copy Path")};
m_actionInfo << open << Openpath << CopyPath;
m_pool.setMaxThreadCount(2);
m_pool.setExpiryTimeout(1000);
}
const QString FileSearchPlugin::name()
{
return tr("File Search");
}
const QString FileSearchPlugin::description()
{
return tr("File search.");
}
QString FileSearchPlugin::getPluginName()
{
return tr("File Search");
}
void Zeeker::FileSearchPlugin::KeywordSearch(QString keyword, DataQueue<ResultInfo> *searchResult)
{
SearchManager::m_mutex1.lock();
++SearchManager::uniqueSymbol1;
SearchManager::m_mutex1.unlock();
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
DirectSearch *directSearch;
directSearch = new DirectSearch(keyword, searchResult, FILE_SEARCH_VALUE, SearchManager::uniqueSymbol1);
m_pool.start(directSearch);
} else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
FileSearch *filesearch;
filesearch = new FileSearch(searchResult, SearchManager::uniqueSymbol1, keyword, FILE_SEARCH_VALUE, 1, 0, 5);
m_pool.start(filesearch);
}
}
QList<SearchPluginIface::Actioninfo> FileSearchPlugin::getActioninfo(int type)
{
return m_actionInfo;
}
void FileSearchPlugin::openAction(int actionkey, QString key, int type)
{
//TODO add some return message here.
switch (actionkey) {
case 0:
FileUtils::openFile(key);
break;
case 1:
FileUtils::openFile(key, true);
case 2:
FileUtils::copyPath(key);
default:
break;
}
}
bool FileSearchPlugin::isPreviewEnable(QString key, int type)
{
return true;
}
QWidget *FileSearchPlugin::previewPage(QString key, int type, QWidget *parent)
{
QWidget *previewPage = new QWidget(parent);
QHBoxLayout * previewLyt = new QHBoxLayout(previewPage);
previewLyt->setContentsMargins(0, 0, 0, 0);
QLabel *label = new QLabel(previewPage);
previewLyt->addWidget(label);
label->setFixedHeight(120);
previewPage->setFixedSize(120,120);
previewLyt->setAlignment(Qt::AlignCenter);
label->setPixmap(FileUtils::getFileIcon(QUrl::fromLocalFile(key).toString()).pixmap(120,120));
return previewPage;
}
DirSearchPlugin::DirSearchPlugin(QObject *parent) : QObject(parent)
{
SearchPluginIface::Actioninfo open { 0, tr("Open")};
SearchPluginIface::Actioninfo Openpath { 1, tr("Open path")};
SearchPluginIface::Actioninfo CopyPath { 2, tr("Copy Path")};
m_actionInfo << open << Openpath << CopyPath;
m_pool.setMaxThreadCount(2);
m_pool.setExpiryTimeout(1000);
}
const QString DirSearchPlugin::name()
{
return tr("Dir Search");
}
const QString DirSearchPlugin::description()
{
return tr("Dir search.");
}
QString DirSearchPlugin::getPluginName()
{
return tr("Dir Search");
}
void Zeeker::DirSearchPlugin::KeywordSearch(QString keyword, DataQueue<ResultInfo> *searchResult)
{
SearchManager::m_mutex2.lock();
++SearchManager::uniqueSymbol2;
SearchManager::m_mutex2.unlock();
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
DirectSearch *directSearch;
directSearch = new DirectSearch(keyword, searchResult, DIR_SEARCH_VALUE, SearchManager::uniqueSymbol2);
m_pool.start(directSearch);
} else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
FileSearch *filesearch;
filesearch = new FileSearch(searchResult, SearchManager::uniqueSymbol2, keyword, DIR_SEARCH_VALUE, 1, 0, 5);
m_pool.start(filesearch);
}
}
QList<SearchPluginIface::Actioninfo> DirSearchPlugin::getActioninfo(int type)
{
return m_actionInfo;
}
void DirSearchPlugin::openAction(int actionkey, QString key, int type)
{
//TODO add some return message here.
switch (actionkey) {
case 0:
FileUtils::openFile(key);
break;
case 1:
FileUtils::openFile(key, true);
case 2:
FileUtils::copyPath(key);
default:
break;
}
}
bool DirSearchPlugin::isPreviewEnable(QString key, int type)
{
return false;
}
QWidget *DirSearchPlugin::previewPage(QString key, int type, QWidget *parent)
{
return nullptr;
}
FileContengSearchPlugin::FileContengSearchPlugin(QObject *parent) : QObject(parent)
{
SearchPluginIface::Actioninfo open { 0, tr("Open")};
SearchPluginIface::Actioninfo Openpath { 1, tr("Open path")};
SearchPluginIface::Actioninfo CopyPath { 2, tr("Copy Path")};
m_actionInfo << open << Openpath << CopyPath;
m_pool.setMaxThreadCount(2);
m_pool.setExpiryTimeout(1000);
}
const QString FileContengSearchPlugin::name()
{
return tr("File Content Search");
}
const QString FileContengSearchPlugin::description()
{
return tr("File content search.");
}
QString FileContengSearchPlugin::getPluginName()
{
return tr("File content search");
}
void Zeeker::FileContengSearchPlugin::KeywordSearch(QString keyword, DataQueue<ResultInfo> *searchResult)
{
SearchManager::m_mutex3.lock();
++SearchManager::uniqueSymbol3;
SearchManager::m_mutex3.unlock();
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
return;
} else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
FileContentSearch *fileContentSearch;
fileContentSearch = new FileContentSearch(searchResult, SearchManager::uniqueSymbol3, keyword, 0, 5);
m_pool.start(fileContentSearch);
}
}
QList<SearchPluginIface::Actioninfo> FileContengSearchPlugin::getActioninfo(int type)
{
return m_actionInfo;
}
void FileContengSearchPlugin::openAction(int actionkey, QString key, int type)
{
//TODO add some return message here.
switch (actionkey) {
case 0:
FileUtils::openFile(key);
break;
case 1:
FileUtils::openFile(key, true);
case 2:
FileUtils::copyPath(key);
default:
break;
}
}
bool FileContengSearchPlugin::isPreviewEnable(QString key, int type)
{
return false;
}
QWidget *FileContengSearchPlugin::previewPage(QString key, int type, QWidget *parent)
{
return nullptr;
}

View File

@ -0,0 +1,87 @@
#ifndef FILESEARCHPLUGIN_H
#define FILESEARCHPLUGIN_H
#include <QObject>
#include <QThreadPool>
#include "libsearch_global.h"
#include "search-plugin-iface.h"
#include "common.h"
namespace Zeeker {
//internal plugin
class LIBSEARCH_EXPORT FileSearchPlugin : public QObject, public SearchPluginIface
{
Q_OBJECT
public:
FileSearchPlugin(QObject *parent = nullptr);
PluginType pluginType() {return PluginType::SearchPlugin;}
const QString name();
const QString description();
const QIcon icon() {return QIcon::fromTheme("folder");}
void setEnable(bool enable) {m_enable = enable;}
bool isEnable() {return m_enable;}
QString getPluginName();
void KeywordSearch(QString keyword,DataQueue<ResultInfo> *searchResult);
QList<SearchPluginIface::Actioninfo> getActioninfo(int type);
void openAction(int actionkey, QString key, int type = 0);
bool isPreviewEnable(QString key, int type);
QWidget *previewPage(QString key, int type, QWidget *parent = nullptr);
private:
bool m_enable = true;
QList<SearchPluginIface::Actioninfo> m_actionInfo;
QThreadPool m_pool;
};
class LIBSEARCH_EXPORT DirSearchPlugin : public QObject, public SearchPluginIface
{
Q_OBJECT
public:
DirSearchPlugin(QObject *parent = nullptr);
PluginType pluginType() {return PluginType::SearchPlugin;}
const QString name();
const QString description();
const QIcon icon() {return QIcon::fromTheme("folder");}
void setEnable(bool enable) {m_enable = enable;}
bool isEnable() {return m_enable;}
QString getPluginName();
void KeywordSearch(QString keyword,DataQueue<ResultInfo> *searchResult);
QList<SearchPluginIface::Actioninfo> getActioninfo(int type);
void openAction(int actionkey, QString key, int type = 0);
bool isPreviewEnable(QString key, int type);
QWidget *previewPage(QString key, int type, QWidget *parent = nullptr);
private:
bool m_enable = true;
QList<SearchPluginIface::Actioninfo> m_actionInfo;
QThreadPool m_pool;
};
class LIBSEARCH_EXPORT FileContengSearchPlugin : public QObject, public SearchPluginIface
{
Q_OBJECT
public:
FileContengSearchPlugin(QObject *parent = nullptr);
PluginType pluginType() {return PluginType::SearchPlugin;}
const QString name();
const QString description();
const QIcon icon() {return QIcon::fromTheme("folder");}
void setEnable(bool enable) {m_enable = enable;}
bool isEnable() {return m_enable;}
QString getPluginName();
void KeywordSearch(QString keyword,DataQueue<ResultInfo> *searchResult);
QList<SearchPluginIface::Actioninfo> getActioninfo(int type);
void openAction(int actionkey, QString key, int type = 0);
bool isPreviewEnable(QString key, int type);
QWidget *previewPage(QString key, int type, QWidget *parent = nullptr);
private:
bool m_enable = true;
QList<SearchPluginIface::Actioninfo> m_actionInfo;
QThreadPool m_pool;
};
}
#endif // FILESEARCHPLUGIN_H

View File

@ -26,6 +26,8 @@
//#define DELETE_QUEUE(a ) //#define DELETE_QUEUE(a )
using namespace Zeeker; using namespace Zeeker;
FirstIndex::FirstIndex() { FirstIndex::FirstIndex() {
m_pool.setMaxThreadCount(2);
m_pool.setExpiryTimeout(100);
} }
FirstIndex::~FirstIndex() { FirstIndex::~FirstIndex() {
@ -46,7 +48,48 @@ void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0"); // qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0")); this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) { if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) {
this->q_content_index->enqueue(fileInfo.absoluteFilePath()); //this->q_content_index->enqueue(fileInfo.absoluteFilePath());
if (fileInfo.fileName().split(".").last() == "docx") {
QuaZip file(fileInfo.absoluteFilePath());
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive))
return;
QuaZipFile fileR(&file);
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//docx解压缩后的xml文件为实际需要解析文件大小
file.close();
} else if (fileInfo.fileName().split(".").last() == "pptx") {
QuaZip file(fileInfo.absoluteFilePath());
if(!file.open(QuaZip::mdUnzip))
return;
QString prefix("ppt/slides/slide");
qint64 fileSize(0);
qint64 fileIndex(0);
for(QString i : file.getFileNameList()) {
if(i.startsWith(prefix)){
QString name = prefix + QString::number(fileIndex + 1) + ".xml";
fileIndex++;
if(!file.setCurrentFile(name)) {
continue;
}
QuaZipFile fileR(&file);
fileSize += fileR.usize();
}
}
file.close();
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileSize));//pptx解压缩后的xml文件为实际需要解析文件大小
} else if (fileInfo.fileName().split(".").last() == "xlsx") {
QuaZip file(fileInfo.absoluteFilePath());
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive))
return;
QuaZipFile fileR(&file);
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//xlsx解压缩后的xml文件为实际解析文件大小
file.close();
} else {
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
}
} }
} }
@ -54,19 +97,6 @@ void FirstIndex::run() {
QTime t1 = QTime::currentTime(); QTime t1 = QTime::currentTime();
// Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running. // Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running.
QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search");
if(!fifoDir.exists())
qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath());
unlink(UKUI_SEARCH_PIPE_PATH);
int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777);
if(retval == -1) {
qCritical() << "creat fifo error!!";
syslog(LOG_ERR, "creat fifo error!!\n");
assert(false);
return;
}
qDebug() << "create fifo success\n";
QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString(); QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString();
QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString(); QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString();
@ -90,8 +120,9 @@ void FirstIndex::run() {
this->q_index = new QQueue<QVector<QString>>(); this->q_index = new QQueue<QVector<QString>>();
//this->q_content_index = new QQueue<QString>(); //this->q_content_index = new QQueue<QString>();
NEW_QUEUE(this->q_content_index); //NEW_QUEUE(this->q_content_index);
// this->mlm = new MessageListManager(); // this->mlm = new MessageListManager();
this->q_content_index = new QQueue<QPair<QString,qint64>>();
int fifo_fd; int fifo_fd;
char buffer[2]; char buffer[2];
@ -110,7 +141,6 @@ void FirstIndex::run() {
++FileUtils::_index_status; ++FileUtils::_index_status;
pid_t pid; pid_t pid;
pid = fork(); pid = fork();
if(pid == 0) { if(pid == 0) {
@ -129,6 +159,7 @@ void FirstIndex::run() {
p_indexGenerator = IndexGenerator::getInstance(true, this); p_indexGenerator = IndexGenerator::getInstance(true, this);
} }
//TODO Fix these weird code.
QSemaphore sem(5); QSemaphore sem(5);
QMutex mutex1, mutex2, mutex3; QMutex mutex1, mutex2, mutex3;
mutex1.lock(); mutex1.lock();
@ -144,40 +175,53 @@ void FirstIndex::run() {
qDebug() << "max_index_count:" << FileUtils::_max_index_count; qDebug() << "max_index_count:" << FileUtils::_max_index_count;
sem.release(5); sem.release(5);
// }); // });
QtConcurrent::run([&]() { QtConcurrent::run(&m_pool, [&]() {
sem.acquire(2); sem.acquire(2);
mutex2.unlock(); mutex2.unlock();
qDebug() << "index start;"; qDebug() << "index start;";
QQueue<QVector<QString>>* tmp = new QQueue<QVector<QString>>(); QQueue<QVector<QString>>* tmp1 = new QQueue<QVector<QString>>();
while(!this->q_index->empty()) { while(!this->q_index->empty()) {
for(size_t i = 0; (i < 8192) && (!this->q_index->empty()); ++i) { for(size_t i = 0; (i < 8192) && (!this->q_index->empty()); ++i) {
tmp->enqueue(this->q_index->dequeue()); tmp1->enqueue(this->q_index->dequeue());
} }
this->p_indexGenerator->creatAllIndex(tmp); this->p_indexGenerator->creatAllIndex(tmp1);
tmp->clear(); tmp1->clear();
} }
// this->p_indexGenerator->setSynonym(); delete tmp1;
delete tmp;
qDebug() << "index end;"; qDebug() << "index end;";
sem.release(2); sem.release(2);
}); });
QtConcurrent::run([&]() { QtConcurrent::run(&m_pool,[&]() {
sem.acquire(2); sem.acquire(2);
mutex3.unlock(); mutex3.unlock();
QQueue<QString>* tmp = new QQueue<QString>(); QQueue<QString>* tmp2 = new QQueue<QString>();
qDebug() << "q_content_index:" << q_content_index->size(); qDebug() << "q_content_index:" << q_content_index->size();
while(!this->q_content_index->empty()) { while(!this->q_content_index->empty()) {
// for (size_t i = 0; (i < this->u_send_length) && (!this->q_content_index->empty()); ++i){ // for (size_t i = 0; (i < this->u_send_length) && (!this->q_content_index->empty()); ++i){
for(size_t i = 0; (i < 30) && (!this->q_content_index->empty()); ++i) { qint64 fileSize = 0;
tmp->enqueue(this->q_content_index->dequeue()); //修改一次处理的数据量从30个文件改为文件总大小为50M以下50M为暂定值--jxx20210519
for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->q_content_index->empty()); ++i) {
QPair<QString,qint64> tempPair = this->q_content_index->dequeue();
fileSize += tempPair.second;
if (fileSize > 52428800 ) {
if (tmp2->size() == 0) {
tmp2->enqueue(tempPair.first);
break;
}
this->q_content_index->enqueue(tempPair);
break;
}
tmp2->enqueue(tempPair.first);
} }
this->p_indexGenerator->creatAllIndex(tmp); // qDebug() << ">>>>>>>>all fileSize:" << fileSize << "file num:" << tmp->size() << "<<<<<<<<<<<<<<<<<<<";
tmp->clear(); this->p_indexGenerator->creatAllIndex(tmp2);
tmp2->clear();
} }
delete tmp; delete tmp2;
qDebug() << "content index end;"; qDebug() << "content index end;";
sem.release(2); sem.release(2);
}); });
mutex1.lock(); mutex1.lock();
mutex2.lock(); mutex2.lock();
mutex3.lock(); mutex3.lock();

View File

@ -56,13 +56,16 @@ private:
bool bool_dataBaseStatusOK = false; bool bool_dataBaseStatusOK = false;
bool bool_dataBaseExist = false; bool bool_dataBaseExist = false;
IndexGenerator* p_indexGenerator = nullptr; IndexGenerator* p_indexGenerator = nullptr;
QThreadPool m_pool;
//here should be refact //here should be refact
// MessageListManager* mlm; // MessageListManager* mlm;
//test //test
QQueue<QVector<QString>>* q_index; QQueue<QVector<QString>>* q_index;
QQueue<QString>* q_content_index; // QQueue<QString>* q_content_index;
//修改QQueue存储数据为QPair<QString,qint64>,增加存储文件大小数据便于处理时统计--jxx20210519
QQueue<QPair<QString,qint64>>* q_content_index;
const QMap<QString, bool> targetFileTypeMap = { const QMap<QString, bool> targetFileTypeMap = {
std::map<QString, bool>::value_type("doc", true), std::map<QString, bool>::value_type("doc", true),

View File

@ -28,9 +28,8 @@
#include "file-utils.h" #include "file-utils.h"
#include "index-generator.h" #include "index-generator.h"
#include "chinese-segmentation.h" #include "chinese-segmentation.h"
#include "construct-document.h"
#include <QStandardPaths> #include <QStandardPaths>
#include <malloc.h>
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString() #define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString() #define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
@ -39,10 +38,14 @@ using namespace Zeeker;
static IndexGenerator *global_instance = nullptr; static IndexGenerator *global_instance = nullptr;
QMutex IndexGenerator::m_mutex; QMutex IndexGenerator::m_mutex;
QList<Document> *Zeeker::_doc_list_path; //QVector<Document> *Zeeker::_doc_list_path;
QMutex Zeeker::_mutex_doc_list_path; //QMutex Zeeker::_mutex_doc_list_path;
QList<Document> *Zeeker::_doc_list_content; //QVector<Document> *Zeeker::_doc_list_content;
QMutex Zeeker::_mutex_doc_list_content; //QMutex Zeeker::_mutex_doc_list_content;
QMutex IndexGenerator::_mutex_doc_list_path;
QMutex IndexGenerator::_mutex_doc_list_content;
QVector<Document> IndexGenerator::_doc_list_path = QVector<Document>();
QVector<Document> IndexGenerator::_doc_list_content = QVector<Document>();
IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent) { IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent) {
QMutexLocker locker(&m_mutex); QMutexLocker locker(&m_mutex);
@ -61,45 +64,33 @@ bool IndexGenerator::setIndexdataPath() {
//文件名索引 //文件名索引
bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList) { bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList) {
// FileUtils::_index_status |= 0x1;
// qDebug() << messageList->size();
HandlePathList(messageList); HandlePathList(messageList);
if(_doc_list_path == NULL) { // if(_doc_list_path == NULL) {
// return false;
// }
if(IndexGenerator::_doc_list_path.isEmpty()) {
return false; return false;
} }
qDebug() << "begin creatAllIndex"; qDebug() << "begin creatAllIndex";
// GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "0");
try { try {
// m_indexer = new Xapian::TermGenerator(); for(auto i : IndexGenerator::_doc_list_path) {
// m_indexer.set_database(*m_database_path);
//可以实现拼写纠正
// m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING);
// m_indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
// int count =0;
for(auto i : *_doc_list_path) {
insertIntoDatabase(i); insertIntoDatabase(i);
// if(++count > 8999){
// count = 0;
// m_database_path->commit();
// }
} }
m_database_path->commit(); m_database_path->commit();
} catch(const Xapian::Error &e) { } catch(const Xapian::Error &e) {
qWarning() << "creatAllIndex fail!" << QString::fromStdString(e.get_description()); qWarning() << "creatAllIndex fail!" << QString::fromStdString(e.get_description());
//need a record //need a record
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "1"); IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "1");
// FileUtils::_index_status &= ~0x1;
assert(false); assert(false);
} }
// GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2");
qDebug() << "finish creatAllIndex"; qDebug() << "finish creatAllIndex";
// FileUtils::_index_status &= ~0x1; IndexGenerator::_doc_list_path.clear();
_doc_list_path->clear(); IndexGenerator::_doc_list_path.squeeze();
delete _doc_list_path; QVector<Document>().swap(IndexGenerator::_doc_list_path);
_doc_list_path = nullptr;
// delete _doc_list_path;
// _doc_list_path = nullptr;
return true; return true;
} }
//文件内容索引 //文件内容索引
@ -107,16 +98,19 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList) {
// FileUtils::_index_status |= 0x2; // FileUtils::_index_status |= 0x2;
HandlePathList(messageList); HandlePathList(messageList);
qDebug() << "begin creatAllIndex for content"; qDebug() << "begin creatAllIndex for content";
if(_doc_list_content == NULL) { // if(_doc_list_content == NULL) {
// return false;
// }
if(IndexGenerator::_doc_list_content.isEmpty()) {
return false; return false;
} }
int size = _doc_list_content->size(); int size = IndexGenerator::_doc_list_content.size();
qDebug() << "begin creatAllIndex for content" << size; qDebug() << "begin creatAllIndex for content" << size;
if(!size == 0) { if(!size == 0) {
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "0"); // GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "0");
try { try {
int count = 0; int count = 0;
for(auto i : *_doc_list_content) { for(auto i : IndexGenerator::_doc_list_content) {
insertIntoContentDatabase(i); insertIntoContentDatabase(i);
if(++count > 999) { if(++count > 999) {
count = 0; count = 0;
@ -133,9 +127,11 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList) {
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2"); // GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
// FileUtils::_index_status &= ~0x2; // FileUtils::_index_status &= ~0x2;
qDebug() << "finish creatAllIndex for content"; qDebug() << "finish creatAllIndex for content";
_doc_list_content->clear();
delete _doc_list_content; IndexGenerator::_doc_list_content.clear();
_doc_list_content = nullptr; IndexGenerator::_doc_list_content.squeeze();
QVector<Document>().swap(IndexGenerator::_doc_list_content);
malloc_trim(0);
} }
Q_EMIT this->transactionFinished(); Q_EMIT this->transactionFinished();
return true; return true;
@ -297,7 +293,7 @@ void IndexGenerator::HandlePathList(QQueue<QString> *messageList) {
return; return;
} }
//deprecated
Document IndexGenerator::GenerateDocument(const QVector<QString> &list) { Document IndexGenerator::GenerateDocument(const QVector<QString> &list) {
Document doc; Document doc;
// qDebug()<<QString::number(quintptr(QThread::currentThreadId())); // qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
@ -342,7 +338,7 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list) {
return doc; return doc;
} }
//deprecated
Document IndexGenerator::GenerateContentDocument(const QString &path) { Document IndexGenerator::GenerateContentDocument(const QString &path) {
// 构造文本索引的document // 构造文本索引的document
QString content; QString content;
@ -389,7 +385,7 @@ bool IndexGenerator::isIndexdataExist() {
} }
//deprecated
QStringList IndexGenerator::IndexSearch(QString indexText) { QStringList IndexGenerator::IndexSearch(QString indexText) {
QStringList searchResult; QStringList searchResult;
try { try {
@ -455,96 +451,60 @@ QStringList IndexGenerator::IndexSearch(QString indexText) {
return searchResult; return searchResult;
} }
//void IndexGenerator::setSynonym()
//{
// try
// {
// m_database_path->add_synonym("a","A");
// m_database_path->add_synonym("b","B");
// m_database_path->add_synonym("c","C");
// m_database_path->add_synonym("d","D");
// m_database_path->add_synonym("e","A");
// m_database_path->add_synonym("f","F");
// m_database_path->add_synonym("g","G");
// m_database_path->add_synonym("h","H");
// m_database_path->add_synonym("i","I");
// m_database_path->add_synonym("j","J");
// m_database_path->add_synonym("k","K");
// m_database_path->add_synonym("l","L");
// m_database_path->add_synonym("m","M");
// m_database_path->add_synonym("n","N");
// m_database_path->add_synonym("o","O");
// m_database_path->add_synonym("p","P");
// m_database_path->add_synonym("q","Q");
// m_database_path->add_synonym("r","R");
// m_database_path->add_synonym("s","S");
// m_database_path->add_synonym("t","T");
// m_database_path->add_synonym("u","U");
// m_database_path->add_synonym("v","V");
// m_database_path->add_synonym("w","W");
// m_database_path->add_synonym("x","X");
// m_database_path->add_synonym("y","Y");
// m_database_path->add_synonym("z","Z");
// m_database_path->add_synonym("A","a");
// m_database_path->add_synonym("B","b");
// m_database_path->add_synonym("C","c");
// m_database_path->add_synonym("D","d");
// m_database_path->add_synonym("E","e");
// m_database_path->add_synonym("F","f");
// m_database_path->add_synonym("G","g");
// m_database_path->add_synonym("H","h");
// m_database_path->add_synonym("I","i");
// m_database_path->add_synonym("J","j");
// m_database_path->add_synonym("K","k");
// m_database_path->add_synonym("L","a");
// m_database_path->add_synonym("M","m");
// m_database_path->add_synonym("N","n");
// m_database_path->add_synonym("O","o");
// m_database_path->add_synonym("P","p");
// m_database_path->add_synonym("Q","q");
// m_database_path->add_synonym("R","r");
// m_database_path->add_synonym("S","s");
// m_database_path->add_synonym("T","t");
// m_database_path->add_synonym("U","u");
// m_database_path->add_synonym("V","v");
// m_database_path->add_synonym("W","w");
// m_database_path->add_synonym("X","x");
// m_database_path->add_synonym("Y","y");
// m_database_path->add_synonym("Z","z");
// m_database_path->commit();
// }
// catch(const Xapian::Error &e)
// {
// qWarning() <<QString::fromStdString(e.get_description());
// }
//}
bool IndexGenerator::deleteAllIndex(QStringList *pathlist) { bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
QStringList *list = pathlist; QStringList *list = pathlist;
if(list->isEmpty()) if(list->isEmpty())
return true; return true;
for(int i = 0; i < list->size(); i++) { try {
QString doc = list->at(i); for(int i = 0; i < list->size(); i++) {
std::string uniqueterm = FileUtils::makeDocUterm(doc); QString doc = list->at(i);
try { std::string uniqueterm = FileUtils::makeDocUterm(doc);
qDebug() << "--delete start--"; qDebug() << "--delete start--";
m_database_path->delete_document(uniqueterm); m_database_path->delete_document(uniqueterm);
m_database_content->delete_document(uniqueterm); m_database_content->delete_document(uniqueterm);
qDebug() << "delete path" << doc; qDebug() << "delete path" << doc;
qDebug() << "delete md5" << QString::fromStdString(uniqueterm); qDebug() << "delete md5" << QString::fromStdString(uniqueterm);
m_database_path->commit();
m_database_content->commit();
qDebug() << "--delete finish--"; qDebug() << "--delete finish--";
// qDebug()<<"m_database_path->get_lastdocid()!!!"<<m_database_path->get_lastdocid(); // qDebug()<<"m_database_path->get_lastdocid()!!!"<<m_database_path->get_lastdocid();
// qDebug()<<"m_database_path->get_doccount()!!!"<<m_database_path->get_doccount();
// qDebug()<<"m_database_path->get_doccount()!!!"<<m_database_path->get_doccount();
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return false;
} }
m_database_path->commit();
m_database_content->commit();
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return false;
} }
Q_EMIT this->transactionFinished(); Q_EMIT this->transactionFinished();
return true; return true;
} }
bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
{
QQueue<QVector<QString>> *fileIndexInfo = new QQueue<QVector<QString>>;
QQueue<QString> *fileContentIndexInfo = new QQueue<QString>;
QStringList *deleteList = new QStringList;
for(PendingFile file : *pendingFiles) {
if(file.shouldRemoveIndex()) {
deleteList->append(file.path());
continue;
}
fileIndexInfo->append(QVector<QString>() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0"));
if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()]))
fileContentIndexInfo->append(file.path());
}
if(!deleteList->isEmpty()) {
deleteAllIndex(deleteList);
}
if(!fileIndexInfo->isEmpty()) {
creatAllIndex(fileIndexInfo);
}
if(!fileContentIndexInfo->isEmpty()) {
creatAllIndex(fileContentIndexInfo);
}
delete fileIndexInfo;
delete fileContentIndexInfo;
return true;
}

View File

@ -29,18 +29,22 @@
#include <QMutex> #include <QMutex>
#include <QQueue> #include <QQueue>
//#include <QMetaObject> //#include <QMetaObject>
#include "construct-document.h"
#include "index-status-recorder.h" #include "index-status-recorder.h"
#include "document.h" #include "document.h"
#include "file-reader.h" #include "file-reader.h"
#include "common.h" #include "common.h"
#include "pending-file.h"
namespace Zeeker { namespace Zeeker {
extern QList<Document> *_doc_list_path; //extern QVector<Document> *_doc_list_path;
extern QMutex _mutex_doc_list_path; //extern QMutex _mutex_doc_list_path;
extern QList<Document> *_doc_list_content; //extern QVector<Document> *_doc_list_content;
extern QMutex _mutex_doc_list_content; //extern QMutex _mutex_doc_list_content;
class IndexGenerator : public QObject { class IndexGenerator : public QObject {
friend class ConstructDocumentForPath;
friend class ConstructDocumentForContent;
Q_OBJECT Q_OBJECT
public: public:
static IndexGenerator *getInstance(bool rebuild = false, QObject *parent = nullptr); static IndexGenerator *getInstance(bool rebuild = false, QObject *parent = nullptr);
@ -58,6 +62,7 @@ public Q_SLOTS:
bool creatAllIndex(QQueue<QVector<QString>> *messageList); bool creatAllIndex(QQueue<QVector<QString>> *messageList);
bool creatAllIndex(QQueue<QString> *messageList); bool creatAllIndex(QQueue<QString> *messageList);
bool deleteAllIndex(QStringList *pathlist); bool deleteAllIndex(QStringList *pathlist);
bool updateIndex(QVector<PendingFile> *pendingFiles);
private: private:
explicit IndexGenerator(bool rebuild = false, QObject *parent = nullptr); explicit IndexGenerator(bool rebuild = false, QObject *parent = nullptr);
@ -72,8 +77,10 @@ private:
void insertIntoDatabase(Document& doc); void insertIntoDatabase(Document& doc);
void insertIntoContentDatabase(Document& doc); void insertIntoContentDatabase(Document& doc);
// QList<Document> *m_doc_list_path; //for path index static QVector<Document> _doc_list_path;
// QList<Document> *m_doc_list_content; // for text content index static QMutex _mutex_doc_list_path;
static QVector<Document> _doc_list_content;
static QMutex _mutex_doc_list_content;
QMap<QString, QStringList> m_index_map; QMap<QString, QStringList> m_index_map;
QString m_index_data_path; QString m_index_data_path;
Xapian::WritableDatabase* m_database_path; Xapian::WritableDatabase* m_database_path;

View File

@ -13,8 +13,10 @@ IndexStatusRecorder *IndexStatusRecorder::getInstance()
void IndexStatusRecorder::setStatus(const QString &key, const QVariant &value) void IndexStatusRecorder::setStatus(const QString &key, const QVariant &value)
{ {
m_mutex.lock();
m_status->setValue(key, value); m_status->setValue(key, value);
m_status->sync(); m_status->sync();
m_mutex.unlock();
} }
const QVariant IndexStatusRecorder::getStatus(const QString &key) const QVariant IndexStatusRecorder::getStatus(const QString &key)

View File

@ -4,9 +4,11 @@
#include <QObject> #include <QObject>
#include <QSettings> #include <QSettings>
#include <QDir> #include <QDir>
#include <QMutex>
#define CONTENT_INDEX_DATABASE_STATE "content_index_database_state" #define CONTENT_INDEX_DATABASE_STATE "content_index_database_state"
#define INDEX_DATABASE_STATE "index_database_state" #define INDEX_DATABASE_STATE "index_database_state"
#define INOTIFY_NORMAL_EXIT "inotify_normal_exit" #define INOTIFY_NORMAL_EXIT "inotify_normal_exit"
#define PENDING_FILE_QUEUE_FINISH "pending_file_queue_finish"
#define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf" #define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf"
namespace Zeeker { namespace Zeeker {
//fixme: we need a better way to record index status. //fixme: we need a better way to record index status.
@ -21,6 +23,7 @@ public:
private: private:
explicit IndexStatusRecorder(QObject *parent = nullptr); explicit IndexStatusRecorder(QObject *parent = nullptr);
QSettings *m_status; QSettings *m_status;
QMutex m_mutex;
}; };
} }

View File

@ -4,10 +4,14 @@ HEADERS += \
$$PWD/construct-document.h \ $$PWD/construct-document.h \
$$PWD/document.h \ $$PWD/document.h \
$$PWD/file-reader.h \ $$PWD/file-reader.h \
$$PWD/file-search-plugin.h \
$$PWD/first-index.h \ $$PWD/first-index.h \
$$PWD/index-generator.h \ $$PWD/index-generator.h \
$$PWD/index-status-recorder.h \ $$PWD/index-status-recorder.h \
$$PWD/inotify-index.h \ $$PWD/inotify-index.h \
$$PWD/inotify-watch.h \
$$PWD/pending-file-queue.h \
$$PWD/pending-file.h \
$$PWD/search-manager.h \ $$PWD/search-manager.h \
$$PWD/searchmethodmanager.h \ $$PWD/searchmethodmanager.h \
$$PWD/traverse_bfs.h \ $$PWD/traverse_bfs.h \
@ -17,10 +21,14 @@ SOURCES += \
$$PWD/construct-document.cpp \ $$PWD/construct-document.cpp \
$$PWD/document.cpp \ $$PWD/document.cpp \
$$PWD/file-reader.cpp \ $$PWD/file-reader.cpp \
$$PWD/file-search-plugin.cpp \
$$PWD/first-index.cpp \ $$PWD/first-index.cpp \
$$PWD/index-generator.cpp \ $$PWD/index-generator.cpp \
$$PWD/index-status-recorder.cpp \ $$PWD/index-status-recorder.cpp \
$$PWD/inotify-index.cpp \ $$PWD/inotify-index.cpp \
$$PWD/inotify-watch.cpp \
$$PWD/pending-file-queue.cpp \
$$PWD/pending-file.cpp \
$$PWD/search-manager.cpp \ $$PWD/search-manager.cpp \
$$PWD/searchmethodmanager.cpp \ $$PWD/searchmethodmanager.cpp \
$$PWD/traverse_bfs.cpp \ $$PWD/traverse_bfs.cpp \

View File

@ -18,6 +18,7 @@
* *
*/ */
#include "inotify-index.h" #include "inotify-index.h"
#include <QDataStream>
#define CREATE_FILE_NAME_INDEX \ #define CREATE_FILE_NAME_INDEX \
indexQueue->enqueue(QVector<QString>() << QString(event->name) << QString(currentPath[event->wd] + '/' + event->name) << QString((event->mask & IN_ISDIR) ? "1" : "0")); \ indexQueue->enqueue(QVector<QString>() << QString(event->name) << QString(currentPath[event->wd] + '/' + event->name) << QString((event->mask & IN_ISDIR) ? "1" : "0")); \
@ -51,7 +52,7 @@ InotifyIndex::InotifyIndex(const QString& path) : Traverse_BFS(path) {
UkuiSearchQDBus usQDBus; UkuiSearchQDBus usQDBus;
usQDBus.setInotifyMaxUserWatches(); usQDBus.setInotifyMaxUserWatches();
qDebug() << "setInotifyMaxUserWatches end"; qDebug() << "setInotifyMaxUserWatches end";
m_sharedMemory = new QSharedMemory("ukui-search-shared-map", this);
} }
InotifyIndex::~InotifyIndex() { InotifyIndex::~InotifyIndex() {
@ -341,7 +342,24 @@ void InotifyIndex::run() {
qDebug() << "select timeout!"; qDebug() << "select timeout!";
::free(read_timeout); ::free(read_timeout);
IndexGenerator::getInstance()->~IndexGenerator(); IndexGenerator::getInstance()->~IndexGenerator();
// GlobalSettings::getInstance()->forceSync(); QBuffer buffer;
QDataStream out(&buffer);
if (m_sharedMemory->isAttached()) {
m_sharedMemory->detach();
}
buffer.open(QBuffer::ReadWrite);
out << currentPath;
int size = buffer.size();
if (!m_sharedMemory->create(size)) {
qDebug() << "Create sharedMemory Error: " << m_sharedMemory->errorString();
} else {
m_sharedMemory->lock();
char *to = static_cast<char *>(m_sharedMemory->data());
const char *from = buffer.data().constData();
memcpy(to, from, qMin(size, m_sharedMemory->size()));
m_sharedMemory->unlock();
}
// GlobalSettings::getInstance()->forceSync();
::_exit(0); ::_exit(0);
} else { } else {
memset(buf, 0x00, BUF_LEN); memset(buf, 0x00, BUF_LEN);
@ -373,6 +391,20 @@ void InotifyIndex::run() {
} else if(pid > 0) { } else if(pid > 0) {
memset(buf, 0x00, BUF_LEN); memset(buf, 0x00, BUF_LEN);
waitpid(pid, NULL, 0); waitpid(pid, NULL, 0);
if (!m_sharedMemory->attach()) {
qDebug() << "SharedMemory attach Error: " << m_sharedMemory->errorString();
} else {
QBuffer buffer;
QDataStream in(&buffer);
QMap<int, QString> pathMap;
m_sharedMemory->lock();
buffer.setData(static_cast<const char *>(m_sharedMemory->constData()), m_sharedMemory->size());
buffer.open(QBuffer::ReadWrite);
in >> pathMap;
m_sharedMemory->unlock();
m_sharedMemory->detach();
currentPath = pathMap;
}
--FileUtils::_index_status; --FileUtils::_index_status;
} else { } else {
assert(false); assert(false);

View File

@ -22,6 +22,7 @@
#include <QThread> #include <QThread>
#include <QTimer> #include <QTimer>
#include <QSharedMemory>
#include <unistd.h> #include <unistd.h>
#include <sys/inotify.h> #include <sys/inotify.h>
#include "index-generator.h" #include "index-generator.h"
@ -77,6 +78,7 @@ private:
std::map<QString, bool>::value_type("et", true), std::map<QString, bool>::value_type("et", true),
std::map<QString, bool>::value_type("pdf", true) std::map<QString, bool>::value_type("pdf", true)
}; };
QSharedMemory *m_sharedMemory = nullptr;
}; };
} }

View File

@ -0,0 +1,459 @@
#include "inotify-watch.h"
#include <sys/ioctl.h>
#include <malloc.h>
#include <errno.h>
using namespace Zeeker;
static InotifyWatch* global_instance_InotifyWatch = nullptr;
Zeeker::InotifyWatch *Zeeker::InotifyWatch::getInstance(const QString &path)
{
if(!global_instance_InotifyWatch) {
global_instance_InotifyWatch = new InotifyWatch(path);
}
return global_instance_InotifyWatch;
}
Zeeker::InotifyWatch::InotifyWatch(const QString &path): Traverse_BFS(path)
{
qDebug() << "setInotifyMaxUserWatches start";
UkuiSearchQDBus usQDBus;
usQDBus.setInotifyMaxUserWatches();
qDebug() << "setInotifyMaxUserWatches end";
m_sharedMemory = new QSharedMemory("ukui-search-shared-map", this);
}
InotifyWatch::~InotifyWatch()
{
if(m_notifier)
delete m_notifier;
m_notifier = nullptr;
}
bool InotifyWatch::addWatch(const QString &path)
{
int ret = inotify_add_watch(m_inotifyFd, path.toStdString().c_str(), (IN_MOVED_FROM | IN_MOVED_TO | IN_CREATE | IN_DELETE | IN_MODIFY));
if(ret == -1) {
qWarning() << "AddWatch error:" << path;
return false;
}
currentPath[ret] = path;
// qDebug() << "Watch: " << path << "ret: " << ret;
return true;
}
bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase)
{
inotify_rm_watch(m_inotifyFd, currentPath.key(path));
if(removeFromDatabase) {
for(QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();) {
// qDebug() << i.value();
// if(i.value().length() > path.length()) {
if(FileUtils::isOrUnder(i.value(), path)) {
qDebug() << "remove path: " << i.value();
inotify_rm_watch(m_inotifyFd, currentPath.key(path));
PendingFile f(i.value());
f.setDeleted();
f.setIsDir();
PendingFileQueue::getInstance()->enqueue(f);
currentPath.erase(i++);
} else {
i++;
}
}
} else {
for(QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();) {
// qDebug() << i.value();
if(i.value().length() > path.length()) {
if(FileUtils::isOrUnder(i.value(), path)) {
// if(i.value().startsWith(path + "/")) {
// qDebug() << "remove path: " << i.value();
inotify_rm_watch(m_inotifyFd, currentPath.key(path));
currentPath.erase(i++);
} else {
i++;
}
} else {
i++;
}
}
}
currentPath.remove(currentPath.key(path));
return true;
}
void InotifyWatch::DoSomething(const QFileInfo &info)
{
qDebug() << info.fileName() << "-------" << info.absoluteFilePath();
if(info.isDir() && (!info.isSymLink())) {
this->addWatch(info.absoluteFilePath());
}
PendingFile f(info.absoluteFilePath());
if(info.isDir()) {
f.setIsDir();
}
PendingFileQueue::getInstance()->enqueue(f);
}
void InotifyWatch::firstTraverse()
{
QQueue<QString> bfs;
bfs.enqueue(this->path);
QFileInfoList list;
QDir dir;
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
while(!bfs.empty()) {
dir.setPath(bfs.dequeue());
list = dir.entryInfoList();
for(auto i : list) {
if(i.isDir() && (!(i.isSymLink()))) {
this->addWatch(i.absoluteFilePath());
bfs.enqueue(i.absoluteFilePath());
}
}
}
}
void InotifyWatch::stopWatch()
{
// if(this->isRunning()) {
// this->quit();
// if(m_notifier)
// delete m_notifier;
// m_notifier = nullptr;
// removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false);
// }
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3");
}
void InotifyWatch::run()
{
m_inotifyFd = inotify_init();
if (m_inotifyFd > 0) {
qDebug()<<"Inotify init success!";
} else {
qWarning() << "Inotify init fail! Now try add inotify_user_instances.";
UkuiSearchQDBus usQDBus;
usQDBus.addInotifyUserInstances(128);
m_inotifyFd = inotify_init();
if (m_inotifyFd > 0) {
qDebug()<<"Inotify init success!";
} else {
printf("errno=%d\n",errno);
printf("Mesg:%s\n",strerror(errno));
Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify");
}
}
this->addWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
this->firstTraverse();
int fifo_fd;
char buffer[2];
memset(buffer, 0, sizeof(buffer));
fifo_fd = open(UKUI_SEARCH_PIPE_PATH, O_RDWR);
if(fifo_fd == -1) {
qWarning() << "Open fifo error\n";
assert(false);
}
int retval = read(fifo_fd, buffer, sizeof(buffer));
if(retval == -1) {
qWarning() << "read error\n";
assert(false);
}
qDebug("Read fifo[%s]", buffer);
qDebug("Read data ok");
close(fifo_fd);
if(buffer[0] & 0x1) {
qDebug("Data confirmed\n");
}
unlink(UKUI_SEARCH_PIPE_PATH);
while(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
fd_set fds;
FD_ZERO(&fds);
FD_SET(m_inotifyFd, &fds);
int rc;
rc = select(m_inotifyFd + 1, &fds, NULL, NULL, NULL);
if(rc > 0) {
int avail;
if (ioctl(m_inotifyFd, FIONREAD, &avail) == EINVAL) {
qWarning() << "Did not receive an entire inotify event.";
return;
}
char* buf = (char*)malloc(avail);
memset(buf, 0x00, avail);
const ssize_t len = read(m_inotifyFd, buf, avail);
if(len != avail) {
qWarning()<<"read event error";
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
}
int i = 0;
while (i < len) {
const struct inotify_event* event = (struct inotify_event*)&buf[i];
if(event->name[0] != '.') {
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
// qDebug("mask:0x%x,",event->mask);
break;
}
i += sizeof(struct inotify_event) + event->len;
}
if(i < len ) {
qDebug() << "fork";
slotEvent(buf, len);
free(buf);
}
} else if(rc < 0) {
// error
qWarning() << "select result < 0, error!";
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
assert(false);
}
}
qDebug() << "Leave watch loop";
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3");
removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false);
}
close(m_inotifyFd);
// fcntl(m_inotifyFd, F_SETFD, FD_CLOEXEC);
// m_notifier = new QSocketNotifier(m_inotifyFd, QSocketNotifier::Read);
// connect(m_notifier, &QSocketNotifier::activated, this, &InotifyWatch::slotEvent, Qt::DirectConnection);
// exec();
}
void InotifyWatch::slotEvent(char *buf, ssize_t len)
{
// eventProcess(socket);
++FileUtils::_index_status;
if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
pid_t pid;
pid = fork();
if(pid == 0) {
prctl(PR_SET_PDEATHSIG, SIGTERM);
prctl(PR_SET_NAME, "inotify-index");
this->eventProcess(buf, len);
fd_set read_fds;
int rc;
timeval* read_timeout = (timeval*)malloc(sizeof(timeval));
read_timeout->tv_sec = 40;
read_timeout->tv_usec = 0;
for(;;) {
FD_ZERO(&read_fds);
FD_SET(m_inotifyFd, &read_fds);
rc = select(m_inotifyFd + 1, &read_fds, NULL, NULL, read_timeout);
if(rc < 0) {
// error
qWarning() << "fork select result < 0, error!";
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
assert(false);
} else if(rc == 0) {
qDebug() << "select timeout!";
::free(read_timeout);
QBuffer buffer;
QDataStream out(&buffer);
if (m_sharedMemory->isAttached()) {
m_sharedMemory->detach();
}
buffer.open(QBuffer::ReadWrite);
out << currentPath;
int size = buffer.size();
if (!m_sharedMemory->create(size)) {
qDebug() << "Create sharedMemory Error: " << m_sharedMemory->errorString();
} else {
m_sharedMemory->lock();
char *to = static_cast<char *>(m_sharedMemory->data());
const char *from = buffer.data().constData();
memcpy(to, from, qMin(size, m_sharedMemory->size()));
m_sharedMemory->unlock();
}
// GlobalSettings::getInstance()->forceSync();
PendingFileQueue::getInstance()->forceFinish();
PendingFileQueue::getInstance()->~PendingFileQueue();
::_exit(0);
} else {
// qDebug() << "Select remain:" <<read_timeout->tv_sec;
this->eventProcess(m_inotifyFd);
// qDebug() << "Select remain:" <<read_timeout->tv_sec;
}
}
} else if(pid > 0) {
waitpid(pid, NULL, 0);
if (!m_sharedMemory->attach()) {
qDebug() << "SharedMemory attach Error: " << m_sharedMemory->errorString();
} else {
QBuffer buffer;
QDataStream in(&buffer);
QMap<int, QString> pathMap;
m_sharedMemory->lock();
buffer.setData(static_cast<const char *>(m_sharedMemory->constData()), m_sharedMemory->size());
buffer.open(QBuffer::ReadWrite);
in >> pathMap;
m_sharedMemory->unlock();
m_sharedMemory->detach();
currentPath = pathMap;
}
--FileUtils::_index_status;
} else {
assert(false);
}
}
}
char * InotifyWatch::filter()
{
int avail;
if (ioctl(m_inotifyFd, FIONREAD, &avail) == EINVAL) {
qWarning() << "Did not receive an entire inotify event.";
return NULL;
}
char* buffer = (char*)malloc(avail);
memset(buffer, 0x00, avail);
const int len = read(m_inotifyFd, buffer, avail);
if(len != avail) {
qWarning()<<"read event error";
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
}
int i = 0;
while (i < len) {
const struct inotify_event* event = (struct inotify_event*)&buffer[i];
if(event->name[0] == '.') {
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
// qDebug("mask:0x%x,",event->mask);
i += sizeof(struct inotify_event) + event->len;
return buffer;
}
}
free(buffer);
return NULL;
}
void InotifyWatch::eventProcess(int socket)
{
// qDebug()<< "Enter eventProcess!";
int avail;
if (ioctl(socket, FIONREAD, &avail) == EINVAL) {
qWarning() << "Did not receive an entire inotify event.";
return;
}
char* buffer = (char*)malloc(avail);
memset(buffer, 0x00, avail);
const ssize_t len = read(socket, buffer, avail);
if(len != avail) {
qWarning()<<"read event error";
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
}
int i = 0;
while (i < len) {
const struct inotify_event* event = (struct inotify_event*)&buffer[i];
if(event->name[0] != '.') {
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
// qDebug("mask:0x%x,",event->mask);
break;
}
i += sizeof(struct inotify_event) + event->len;
}
if(i >= len) {
qDebug() << "There is nothing to do!";
return;
}
eventProcess(buffer, len);
free(buffer);
}
void InotifyWatch::eventProcess(const char *buffer, ssize_t len)
{
// qDebug()<< "Begin eventProcess! len:" << len;
char * p = const_cast<char*>(buffer);
while (p < buffer + len) {
const struct inotify_event* event = reinterpret_cast<inotify_event *>(p);
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
// qDebug("mask:0x%x,",event->mask);
if(event->name[0] != '.') {
QString path = currentPath[event->wd] + '/' + event->name;
//Create top dir first, traverse it last.
if(event->mask & IN_CREATE) {
// qDebug() << "IN_CREATE";
PendingFile f(path);
if(event->mask & IN_ISDIR) {
f.setIsDir();
}
PendingFileQueue::getInstance(this)->enqueue(f);
if(event->mask & IN_ISDIR) {
if(!QFileInfo(path).isSymLink()){
addWatch(path);
setPath(path);
Traverse();
}
}
goto next;
}
if((event->mask & IN_DELETE) | (event->mask & IN_MOVED_FROM)) {
qDebug() << "IN_DELETE or IN_MOVED_FROM";
if(event->mask & IN_ISDIR) {
removeWatch(path);
} else {
PendingFile f(path);
f.setDeleted();
PendingFileQueue::getInstance()->enqueue(f);
}
p += sizeof(struct inotify_event) + event->len;
continue;
}
if(event->mask & IN_MODIFY) {
// qDebug() << "IN_MODIFY";
if(!(event->mask & IN_ISDIR)) {
PendingFileQueue::getInstance()->enqueue(PendingFile(path));
}
goto next;
}
if(event->mask & IN_MOVED_TO) {
qDebug() << "IN_MOVED_TO";
if(event->mask & IN_ISDIR) {
removeWatch(path);
PendingFile f(path);
f.setIsDir();
PendingFileQueue::getInstance()->enqueue(f);
if(!QFileInfo(path).isSymLink()){
addWatch(path);
setPath(path);
Traverse();
}
} else {
//Enqueue a deleted file to merge.
PendingFile f(path);
f.setDeleted();
PendingFileQueue::getInstance()->enqueue(f);
//Enqueue a new one.
PendingFileQueue::getInstance()->enqueue(PendingFile(path));
}
goto next;
}
}
next:
p += sizeof(struct inotify_event) + event->len;
}
// qDebug()<< "Finish eventProcess!";
}

View File

@ -0,0 +1,52 @@
#ifndef INOTIFYWATCH_H
#define INOTIFYWATCH_H
#include <QThread>
#include <unistd.h>
#include <sys/inotify.h>
#include <QSocketNotifier>
#include <QDataStream>
#include <QSharedMemory>
#include "traverse_bfs.h"
#include "ukui-search-qdbus.h"
#include "index-status-recorder.h"
#include "file-utils.h"
#include "first-index.h"
#include "pending-file-queue.h"
#include "common.h"
namespace Zeeker {
class InotifyWatch : public QThread, public Traverse_BFS
{
Q_OBJECT
public:
static InotifyWatch* getInstance(const QString& path);
bool addWatch(const QString &path);
bool removeWatch(const QString &path, bool removeFromDatabase = true);
virtual void DoSomething(const QFileInfo &info) final;
void firstTraverse();
void stopWatch();
protected:
void run() override;
private Q_SLOTS:
void slotEvent(char *buf, ssize_t len);
private:
explicit InotifyWatch(const QString& path);
~InotifyWatch();
char * filter();
void eventProcess(int socket);
void eventProcess(const char *buffer, ssize_t len);
int m_inotifyFd;
QSocketNotifier* m_notifier = nullptr;
QSharedMemory *m_sharedMemory = nullptr;
QMap<int, QString> currentPath;
QMutex m_mutex;
};
}
#endif // INOTIFYWATCH_H

View File

@ -0,0 +1,166 @@
/*
* Copyright (C) 2021, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#include "pending-file-queue.h"
#include "file-utils.h"
#include <malloc.h>
using namespace Zeeker;
static PendingFileQueue *global_instance_pending_file_queue = nullptr;
PendingFileQueue::PendingFileQueue(QObject *parent) : QThread(parent)
{
this->start();
m_cacheTimer = new QTimer;
m_minProcessTimer = new QTimer;
m_cacheTimer->setInterval(10*1000);
m_cacheTimer->setSingleShot(true);
m_minProcessTimer->setInterval(500);
m_minProcessTimer->setSingleShot(true);
m_cacheTimer->moveToThread(this);
m_minProcessTimer->moveToThread(this);
// connect(this, &PendingFileQueue::cacheTimerStart, m_cacheTimer, f, Qt::DirectConnection);
// connect(this, &PendingFileQueue::minProcessTimerStart, m_minProcessTimer, f,Qt::DirectConnection);
connect(this, SIGNAL(cacheTimerStart()), m_cacheTimer, SLOT(start()));
connect(this, SIGNAL(minProcessTimerStart()), m_minProcessTimer, SLOT(start()));
connect(this, &PendingFileQueue::timerStop, m_cacheTimer, &QTimer::stop);
connect(this, &PendingFileQueue::timerStop, m_minProcessTimer, &QTimer::stop);
connect(m_cacheTimer, &QTimer::timeout, this, &PendingFileQueue::processCache, Qt::DirectConnection);
connect(m_minProcessTimer, &QTimer::timeout, this, &PendingFileQueue::processCache, Qt::DirectConnection);
}
PendingFileQueue *PendingFileQueue::getInstance(QObject *parent)
{
if (!global_instance_pending_file_queue) {
global_instance_pending_file_queue = new PendingFileQueue(parent);
}
return global_instance_pending_file_queue;
}
PendingFileQueue::~PendingFileQueue()
{
if(m_cacheTimer) {
delete m_cacheTimer;
m_cacheTimer = nullptr;
}
if(m_minProcessTimer) {
delete m_minProcessTimer;
m_minProcessTimer = nullptr;
}
IndexGenerator::getInstance()->~IndexGenerator();
}
void PendingFileQueue::forceFinish()
{
QThread::msleep(600);
Q_EMIT timerStop();
this->quit();
this->wait();
}
void PendingFileQueue::enqueue(const PendingFile &file)
{
// qDebug() << "enqueuq file: " << file.path();
m_mutex.lock();
m_enqueuetimes++;
if(m_cache.isEmpty()) {
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "0");
}
// Remove all indexs of files under a dir which is to about be deleted,but keep delete signals.
// Because our datebase need to delete those indexs one by one.
if(file.shouldRemoveIndex() && file.isDir()) {
const auto keepFile = [&file](const PendingFile& pending) {
return (!FileUtils::isOrUnder(pending.path(), file.path()) || pending.shouldRemoveIndex());
};
const auto end = m_cache.end();
const auto droppedFilesBegin = std::stable_partition(m_cache.begin(), end, keepFile);
m_cache.erase(droppedFilesBegin, end);
}
if(file.shouldRemoveIndex()) {
m_cache.removeOne(file);
}
int i = m_cache.indexOf(file);
if (i == -1) {
// qDebug() << "insert file" << file.path() << file.shouldRemoveIndex();
m_cache << file;
} else {
// qDebug() << "merge file" << file.path() << file.shouldRemoveIndex();
m_cache[i].merge(file);
}
if(!m_cacheTimer->isActive()) {
// qDebug()<<"m_cacheTimer-----start!!";
// m_cacheTimer->start();
Q_EMIT cacheTimerStart();
}
Q_EMIT minProcessTimerStart();
// m_minProcessTimer->start();
// qDebug()<<"m_minProcessTimer-----start!!";
m_mutex.unlock();
// qDebug() << "Current cache-------------";
// for(PendingFile i : m_cache) {
// qDebug() << "|" << i.path();
// qDebug() << "|" <<i.shouldRemoveIndex();
// }
// qDebug() << "Current cache-------------";
// qDebug()<<"enqueuq file finish!!"<<file.path();
}
void PendingFileQueue::run()
{
exec();
}
void PendingFileQueue::processCache()
{
qDebug()<< "Begin processCache!" ;
m_mutex.lock();
qDebug() << "Events:" << m_enqueuetimes;
m_enqueuetimes = 0;
m_cache.swap(m_pendingFiles);
// m_pendingFiles = m_cache;
// m_cache.clear();
// m_cache.squeeze();
m_mutex.unlock();
qDebug() << "Current process-------------";
for(PendingFile i : m_pendingFiles) {
qDebug() << "|" << i.path();
qDebug() << "|" <<i.shouldRemoveIndex();
}
qDebug() << "Current process-------------";
if(m_pendingFiles.isEmpty()) {
qDebug()<< "Empty, finish processCache!";
return;
}
IndexGenerator::getInstance()->updateIndex(&m_pendingFiles);
m_mutex.lock();
if(m_cache.isEmpty()) {
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2");
}
m_mutex.unlock();
m_pendingFiles.clear();
m_pendingFiles.squeeze();
malloc_trim(0);
qDebug()<< "Finish processCache!";
return;
}

View File

@ -0,0 +1,67 @@
/*
* Copyright (C) 2021, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#ifndef PENDINGFILEQUEUE_H
#define PENDINGFILEQUEUE_H
#include <QObject>
#include <QVector>
#include <QTimer>
#include <QThread>
#include <QMutex>
#include "pending-file.h"
#include "index-generator.h"
namespace Zeeker {
class PendingFileQueue : public QThread
{
Q_OBJECT
public:
static PendingFileQueue *getInstance(QObject *parent = nullptr);
~PendingFileQueue();
//This method will block until current cache has been processed.
//Do not do enqueue operation in other thread while this method is running.
void forceFinish();
void enqueue(const PendingFile& file);
QTimer *m_cacheTimer = nullptr;
QTimer *m_minProcessTimer = nullptr;
protected:
void run() override;
Q_SIGNALS:
void cacheTimerStart();
void minProcessTimerStart();
void timerStop();
private:
void processCache();
explicit PendingFileQueue(QObject *parent = nullptr);
QVector<PendingFile> m_cache;
QVector<PendingFile> m_pendingFiles;
QMutex m_mutex;
QMutex m_timeoutMutex;
QThread *m_timerThread = nullptr;
bool m_timeout = false;
int m_enqueuetimes = 0;
};
}
#endif // PENDINGFILEQUEUE_H

View File

@ -0,0 +1,81 @@
/*
* Copyright (C) 2021, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#include "pending-file.h"
using namespace Zeeker;
PendingFile::PendingFile(const QString &path)
: m_path(path)
, m_deleted(false)
, m_modified(false)
, m_isDir(false)
{
}
QString PendingFile::path() const
{
return m_path;
}
void PendingFile::setPath(const QString& path)
{
if (path.endsWith(QLatin1Char('/'))) {
m_path = path.mid(0, m_path.length() - 1);
return;
}
m_path = path;
}
//bool PendingFile::isNewFile() const
//{
// return m_created;
//}
//bool PendingFile::shouldIndexContents() const
//{
// if (m_created || m_modified) {
// return true;
// }
// return false;
//}
bool PendingFile::isDir() const
{
return m_isDir;
}
bool PendingFile::shouldRemoveIndex() const
{
return m_deleted;
}
void PendingFile::merge(const PendingFile& file)
{
// m_created |= file.m_created;
m_modified = file.m_modified;
m_deleted = file.m_deleted;
}
void PendingFile::printFlags() const
{
// qDebug() << "Created:" << m_created;
qDebug() << "Deleted:" << m_deleted;
qDebug() << "Modified:" << m_modified;
qDebug() << "Is dir:" << m_isDir;
}

View File

@ -0,0 +1,65 @@
/*
* Copyright (C) 2021, KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#ifndef PENDINGFILE_H
#define PENDINGFILE_H
#include <QString>
#include <QDebug>
namespace Zeeker {
/**
* Represents a file/folder which needs to be indexed.
*/
class PendingFile
{
public:
explicit PendingFile(const QString& path = QString());
QString path() const;
void setPath(const QString& path);
void setIsDir(){ m_isDir = true; }
void setModified() { m_modified = true; }
// void setCreated() { m_created = true; }
void setDeleted() { m_deleted = true; }
bool shouldRemoveIndex() const;
// bool shouldIndexContents() const;
bool isDir() const;
bool operator == (const PendingFile& rhs) const {
return (m_path == rhs.m_path);
}
/**
* Takes a PendingFile \p file and merges its flags into
* the current PendingFile
*/
void merge(const PendingFile& file);
private:
QString m_path;
// bool m_created : 1;
bool m_deleted : 1;
bool m_modified : 1;
bool m_isDir : 1;
void printFlags() const;
};
}
#endif // PENDINGFILE_H

View File

@ -27,7 +27,7 @@ QMutex SearchManager::m_mutex1;
QMutex SearchManager::m_mutex2; QMutex SearchManager::m_mutex2;
QMutex SearchManager::m_mutex3; QMutex SearchManager::m_mutex3;
SearchManager::SearchManager(QObject *parent) : QObject(parent) { SearchManager::SearchManager(QObject *parent) : QObject(parent) {
m_pool.setMaxThreadCount(2); m_pool.setMaxThreadCount(3);
m_pool.setExpiryTimeout(1000); m_pool.setExpiryTimeout(1000);
} }
@ -56,39 +56,54 @@ void SearchManager::onKeywordSearch(QString keyword, QQueue<QString> *searchResu
++uniqueSymbol3; ++uniqueSymbol3;
m_mutex3.unlock(); m_mutex3.unlock();
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) { // if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
DirectSearch *directSearch; // DirectSearch *directSearch;
directSearch = new DirectSearch(keyword, searchResultFile, searchResultDir, uniqueSymbol1); // directSearch = new DirectSearch(keyword, searchResultFile, searchResultDir, uniqueSymbol1);
m_pool.start(directSearch); // m_pool.start(directSearch);
} else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) { // } else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
FileSearch *filesearch; // FileSearch *filesearch;
filesearch = new FileSearch(searchResultFile, uniqueSymbol1, keyword, "0", 1, 0, 5); // filesearch = new FileSearch(searchResultFile, uniqueSymbol1, keyword, "0", 1, 0, 5);
m_pool.start(filesearch); // m_pool.start(filesearch);
FileSearch *dirsearch; // FileSearch *dirsearch;
dirsearch = new FileSearch(searchResultDir, uniqueSymbol2, keyword, "1", 1, 0, 5); // dirsearch = new FileSearch(searchResultDir, uniqueSymbol2, keyword, "1", 1, 0, 5);
m_pool.start(dirsearch); // m_pool.start(dirsearch);
FileContentSearch *contentSearch; // FileContentSearch *contentSearch;
contentSearch = new FileContentSearch(searchResultContent, uniqueSymbol3, keyword, 0, 5); // contentSearch = new FileContentSearch(searchResultContent, uniqueSymbol3, keyword, 0, 5);
m_pool.start(contentSearch); // m_pool.start(contentSearch);
} else { // } else {
qWarning() << "Unknown search method! FileUtils::searchMethod: " << static_cast<int>(FileUtils::searchMethod); // qWarning() << "Unknown search method! FileUtils::searchMethod: " << static_cast<int>(FileUtils::searchMethod);
} // }
return; return;
} }
bool SearchManager::isBlocked(QString &path) { bool SearchManager::isBlocked(QString &path) {
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs(); QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
for(QString i : blockList) { for(QString i : blockList) {
if(path.startsWith(i.prepend("/"))) if(FileUtils::isOrUnder(path, i))
return true; return true;
} }
return false; return false;
} }
FileSearch::FileSearch(QQueue<QString> *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) { bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString path)
{
QFileInfo info(path);
if(!info.exists()) {
return false;
}
ri.icon = FileUtils::getFileIcon(QUrl::fromLocalFile(path).toString());
ri.name = info.fileName();
ri.description = QVector<SearchPluginIface::DescriptionInfo>() \
<< SearchPluginIface::DescriptionInfo{tr("Path:"), path} \
<< SearchPluginIface::DescriptionInfo{tr("Modified time:"), info.lastModified().toString("yyyy/MM/dd hh:mm:ss")};
ri.actionKey = path;
ri.type = 0;
return true;
}
FileSearch::FileSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) {
this->setAutoDelete(true); this->setAutoDelete(true);
m_search_result = searchResult; m_search_result = searchResult;
m_uniqueSymbol = uniqueSymbol; m_uniqueSymbol = uniqueSymbol;
@ -178,18 +193,13 @@ int FileSearch::getResult(Xapian::MSet &result) {
if(SearchManager::isBlocked(path)) { if(SearchManager::isBlocked(path)) {
continue; continue;
} }
SearchPluginIface::ResultInfo ri;
QFileInfo info(path); if(SearchManager::creatResultInfo(ri, path)) {
if(!info.exists()) {
// pathTobeDelete->append(QString::fromStdString(data));
qDebug() << path << "is not exist!!";
} else {
switch(m_value.toInt()) { switch(m_value.toInt()) {
case 1: case 1:
SearchManager::m_mutex1.lock(); SearchManager::m_mutex1.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbol2) { if(m_uniqueSymbol == SearchManager::uniqueSymbol2) {
m_search_result->enqueue(path); m_search_result->enqueue(ri);
SearchManager::m_mutex1.unlock(); SearchManager::m_mutex1.unlock();
} else { } else {
SearchManager::m_mutex1.unlock(); SearchManager::m_mutex1.unlock();
@ -200,7 +210,7 @@ int FileSearch::getResult(Xapian::MSet &result) {
case 0: case 0:
SearchManager::m_mutex2.lock(); SearchManager::m_mutex2.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbol1) { if(m_uniqueSymbol == SearchManager::uniqueSymbol1) {
m_search_result->enqueue(path); m_search_result->enqueue(ri);
SearchManager::m_mutex2.unlock(); SearchManager::m_mutex2.unlock();
} else { } else {
SearchManager::m_mutex2.unlock(); SearchManager::m_mutex2.unlock();
@ -210,8 +220,8 @@ int FileSearch::getResult(Xapian::MSet &result) {
default: default:
break; break;
} }
// searchResult.append(path);
} }
// searchResult.append(path);
qDebug() << "doc=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent; qDebug() << "doc=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
} }
// if(!pathTobeDelete->isEmpty()) // if(!pathTobeDelete->isEmpty())
@ -219,7 +229,7 @@ int FileSearch::getResult(Xapian::MSet &result) {
return 0; return 0;
} }
FileContentSearch::FileContentSearch(QQueue<QPair<QString, QStringList>> *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) { FileContentSearch::FileContentSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) {
this->setAutoDelete(true); this->setAutoDelete(true);
m_search_result = searchResult; m_search_result = searchResult;
m_uniqueSymbol = uniqueSymbol; m_uniqueSymbol = uniqueSymbol;
@ -280,29 +290,15 @@ int FileContentSearch::keywordSearchContent() {
words.append(sKeyWord.at(i).word).append(" "); words.append(sKeyWord.at(i).word).append(" ");
} }
Xapian::Query query = qp.parse_query(words);
// Xapian::Query query = qp.parse_query(keyword.toStdString());
// QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword);
// //Creat a query
// std::string words;
// for(int i=0;i<sKeyWord.size();i++)
// {
// words.append(sKeyWord.at(i).word).append(" ");
// }
// Xapian::Query query = qp.parse_query(words); // Xapian::Query query = qp.parse_query(words);
// std::vector<Xapian::Query> v; std::vector<Xapian::Query> v;
// for(int i=0;i<sKeyWord.size();i++) for(int i=0; i<sKeyWord.size(); i++) {
// { v.push_back(Xapian::Query(sKeyWord.at(i).word));
// v.push_back(Xapian::Query(sKeyWord.at(i).word)); qDebug() << QString::fromStdString(sKeyWord.at(i).word);
// qDebug()<<QString::fromStdString(sKeyWord.at(i).word); }
// } Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
// Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
qDebug() << "keywordSearchContent:" << QString::fromStdString(query.get_description()); qDebug() << "keywordSearchContent:" << QString::fromStdString(query.get_description());
enquire.set_query(query); enquire.set_query(query);
@ -339,15 +335,12 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
continue; continue;
} }
QFileInfo info(path); SearchPluginIface::ResultInfo ri;
if(!SearchManager::creatResultInfo(ri, path)) {
if(!info.exists()) {
// pathTobeDelete->append(QString::fromStdString(data));
qDebug() << path << "is not exist!!";
continue; continue;
} }
// Construct snippets containing keyword. // Construct snippets containing keyword.
QStringList snippets; // QStringList snippets;
// snippets.append(QString::fromStdString( result.snippet(doc.get_data(),400))); // snippets.append(QString::fromStdString( result.snippet(doc.get_data(),400)));
// qWarning()<<QString::fromStdString(s); // qWarning()<<QString::fromStdString(s);
auto term = doc.termlist_begin(); auto term = doc.termlist_begin();
@ -363,13 +356,16 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
} else { } else {
snippet.append("...").prepend("..."); snippet.append("...").prepend("...");
} }
snippets.append(snippet); ri.description.prepend(SearchPluginIface::DescriptionInfo{"",snippet});
// snippets.append(snippet);
QString().swap(snippet); QString().swap(snippet);
std::string().swap(s); std::string().swap(s);
++count; ++count;
} }
std::string().swap(data); std::string().swap(data);
// for(QString i : QString::fromStdString(keyWord).split(" ",QString::SkipEmptyParts)) // for(QString i : QString::fromStdString(keyWord).split(" ",QString::SkipEmptyParts))
// { // {
// std::string word = i.toStdString(); // std::string word = i.toStdString();
@ -387,10 +383,10 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
SearchManager::m_mutex3.lock(); SearchManager::m_mutex3.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbol3) { if(m_uniqueSymbol == SearchManager::uniqueSymbol3) {
m_search_result->enqueue(qMakePair(path, snippets)); m_search_result->enqueue(ri);
SearchManager::m_mutex3.unlock(); SearchManager::m_mutex3.unlock();
snippets.clear(); // snippets.clear();
QStringList().swap(snippets); // QStringList().swap(snippets);
} else { } else {
SearchManager::m_mutex3.unlock(); SearchManager::m_mutex3.unlock();
return -1; return -1;
@ -403,12 +399,12 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
return 0; return 0;
} }
DirectSearch::DirectSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, size_t uniqueSymbol) { DirectSearch::DirectSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult, QString value, size_t uniqueSymbol) {
this->setAutoDelete(true); this->setAutoDelete(true);
m_keyword = keyword; m_keyword = keyword;
m_searchResultFile = searchResultFile; m_searchResult = searchResult;
m_searchResultDir = searchResultDir;
m_uniqueSymbol = uniqueSymbol; m_uniqueSymbol = uniqueSymbol;
m_value = value;
} }
void DirectSearch::run() { void DirectSearch::run() {
@ -417,8 +413,13 @@ void DirectSearch::run() {
QFileInfoList list; QFileInfoList list;
QDir dir; QDir dir;
// QDir::Hidden // QDir::Hidden
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot); if(m_value == DIR_SEARCH_VALUE) {
dir.setSorting(QDir::DirsFirst); dir.setFilter(QDir::Dirs | QDir::NoDotAndDotDot);
} else {
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
}
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs(); QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
while(!bfs.empty()) { while(!bfs.empty()) {
dir.setPath(bfs.dequeue()); dir.setPath(bfs.dequeue());
@ -428,7 +429,7 @@ void DirectSearch::run() {
bool findIndex = false; bool findIndex = false;
for (QString j : blockList) { for (QString j : blockList) {
if (i.absoluteFilePath().startsWith(j.prepend("/"))) { if (FileUtils::isOrUnder(i.absoluteFilePath(), j)) {
findIndex = true; findIndex = true;
break; break;
} }
@ -442,26 +443,22 @@ void DirectSearch::run() {
bfs.enqueue(i.absoluteFilePath()); bfs.enqueue(i.absoluteFilePath());
} }
if(i.fileName().contains(m_keyword, Qt::CaseInsensitive)) { if(i.fileName().contains(m_keyword, Qt::CaseInsensitive)) {
SearchManager::m_mutex1.lock();
// qWarning() << i.fileName() << m_keyword; // qWarning() << i.fileName() << m_keyword;
if(m_uniqueSymbol == SearchManager::uniqueSymbol1) { if(m_searchResult->length() > 49)
// TODO
if(i.isDir() && m_searchResultDir->length() < 51) {
m_searchResultDir->enqueue(i.absoluteFilePath());
} else if(m_searchResultFile->length() < 51) {
m_searchResultFile->enqueue(i.absoluteFilePath());
}
SearchManager::m_mutex1.unlock();
if(m_searchResultDir->length() > 49 && m_searchResultFile->length() > 49) {
return;
}
} else {
// TODO
// More suitable method?
m_searchResultFile->clear();
m_searchResultDir->clear();
SearchManager::m_mutex1.unlock();
return; return;
if((i.isDir() && m_value == DIR_SEARCH_VALUE) || (i.isFile() && m_value == FILE_SEARCH_VALUE)) {
SearchPluginIface::ResultInfo ri;
if(SearchManager::creatResultInfo(ri,i.absoluteFilePath())) {
SearchManager::m_mutex1.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbol1) {
m_searchResult->enqueue(ri);
SearchManager::m_mutex1.unlock();
} else {
SearchManager::m_mutex1.unlock();
return;
}
}
} }
} }
} }

View File

@ -37,19 +37,19 @@
#include <QThread> #include <QThread>
#include <QUrl> #include <QUrl>
#include "search-plugin-iface.h"
#include "file-utils.h" #include "file-utils.h"
#include "global-settings.h" #include "global-settings.h"
#include "chinese-segmentation.h" #include "chinese-segmentation.h"
#include "common.h"
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString() #define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString() #define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
namespace Zeeker { namespace Zeeker {
class LIBSEARCH_EXPORT SearchManager : public QObject { class LIBSEARCH_EXPORT SearchManager : public QObject {
friend class FileSearch; friend class FileSearch;
friend class FileContentSearch; friend class FileContentSearch;
friend class DirectSearch;
Q_OBJECT Q_OBJECT
public: public:
explicit SearchManager(QObject *parent = nullptr); explicit SearchManager(QObject *parent = nullptr);
@ -72,33 +72,15 @@ Q_SIGNALS:
void resultDir(QQueue<QString> *); void resultDir(QQueue<QString> *);
void resultContent(QQueue<QPair<QString, QStringList>> *); void resultContent(QQueue<QPair<QString, QStringList>> *);
private: private:
// int keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value,unsigned slot = 1,int begin = 0, int num = 20);
// int keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
/**
* @brief SearchManager::creatQueryForFileSearch
* This part shall be optimized frequently to provide a more stable search function.
* @param keyword
* @param db
* @return Xapian::Query
*/
// Xapian::Query creatQueryForFileSearch(QString keyword, Xapian::Database &db);
// Xapian::Query creatQueryForContentSearch(QString keyword, Xapian::Database &db);
// int getResult(size_t uniqueSymbol, Xapian::MSet &result, QString value);
// int getContentResult(size_t uniqueSymbol, Xapian::MSet &result,std::string &keyWord);
static bool isBlocked(QString &path); static bool isBlocked(QString &path);
static bool creatResultInfo(Zeeker::SearchPluginIface::ResultInfo &ri, QString path);
// QQueue<QString> *m_search_result_file = nullptr;
// QQueue<QString> *m_search_result_dir = nullptr;
// QQueue<QPair<QString,QStringList>> *m_search_result_content = nullptr;
QThreadPool m_pool; QThreadPool m_pool;
}; };
class FileSearch : public QRunnable { class FileSearch : public QRunnable {
public: public:
explicit FileSearch(QQueue<QString> *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot = 1, int begin = 0, int num = 20); explicit FileSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot = 1, int begin = 0, int num = 20);
~FileSearch(); ~FileSearch();
protected: protected:
void run(); void run();
@ -107,7 +89,7 @@ private:
Xapian::Query creatQueryForFileSearch(Xapian::Database &db); Xapian::Query creatQueryForFileSearch(Xapian::Database &db);
int getResult(Xapian::MSet &result); int getResult(Xapian::MSet &result);
QQueue<QString> *m_search_result = nullptr; DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr;
QString m_value; QString m_value;
unsigned m_slot = 1; unsigned m_slot = 1;
size_t m_uniqueSymbol; size_t m_uniqueSymbol;
@ -118,7 +100,7 @@ private:
class FileContentSearch : public QRunnable { class FileContentSearch : public QRunnable {
public: public:
explicit FileContentSearch(QQueue<QPair<QString, QStringList>> *searchResult, size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20); explicit FileContentSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
~FileContentSearch(); ~FileContentSearch();
protected: protected:
void run(); void run();
@ -126,7 +108,7 @@ private:
int keywordSearchContent(); int keywordSearchContent();
int getResult(Xapian::MSet &result, std::string &keyWord); int getResult(Xapian::MSet &result, std::string &keyWord);
QQueue<QPair<QString, QStringList>> *m_search_result = nullptr; DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr;
size_t m_uniqueSymbol; size_t m_uniqueSymbol;
QString m_keyword; QString m_keyword;
int m_begin = 0; int m_begin = 0;
@ -135,14 +117,14 @@ private:
class DirectSearch : public QRunnable { class DirectSearch : public QRunnable {
public: public:
explicit DirectSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, size_t uniqueSymbol); explicit DirectSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult, QString value, size_t uniqueSymbol);
protected: protected:
void run(); void run();
private: private:
QString m_keyword; QString m_keyword;
QQueue<QString>* m_searchResultFile = nullptr; DataQueue<SearchPluginIface::ResultInfo>* m_searchResult = nullptr;
QQueue<QString>* m_searchResultDir = nullptr;
size_t m_uniqueSymbol; size_t m_uniqueSymbol;
QString m_value;
}; };
} }

View File

@ -1,25 +1,49 @@
#include "searchmethodmanager.h" #include "searchmethodmanager.h"
using namespace Zeeker; using namespace Zeeker;
SearchMethodManager::SearchMethodManager()
{
m_iw = InotifyWatch::getInstance(HOME_PATH);
}
void SearchMethodManager::searchMethod(FileUtils::SearchMethod sm) { void SearchMethodManager::searchMethod(FileUtils::SearchMethod sm) {
qWarning() << "searchMethod start: " << static_cast<int>(sm); qWarning() << "searchMethod start: " << static_cast<int>(sm);
if(FileUtils::SearchMethod::INDEXSEARCH == sm || FileUtils::SearchMethod::DIRECTSEARCH == sm) { if(FileUtils::SearchMethod::INDEXSEARCH == sm || FileUtils::SearchMethod::DIRECTSEARCH == sm) {
FileUtils::searchMethod = sm; FileUtils::searchMethod = sm;
} else { } else {
printf("enum class error!!!\n");
qWarning("enum class error!!!\n"); qWarning("enum class error!!!\n");
} }
if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::_index_status) { if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::_index_status) {
// Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running.
QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search");
if(!fifoDir.exists())
qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath());
unlink(UKUI_SEARCH_PIPE_PATH);
int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777);
if(retval == -1) {
qCritical() << "creat fifo error!!";
syslog(LOG_ERR, "creat fifo error!!\n");
assert(false);
return;
}
qDebug() << "create fifo success\n";
qWarning() << "start first index"; qWarning() << "start first index";
// m_fi = FirstIndex("/home/zhangzihao/Desktop");
m_fi.start(); m_fi.start();
qWarning() << "start inotify index"; qWarning() << "start inotify index";
// InotifyIndex ii("/home"); // InotifyIndex ii("/home");
// ii.start(); // ii.start();
this->m_ii = InotifyIndex::getInstance("/home"); // this->m_ii = InotifyIndex::getInstance("/home");
if(!this->m_ii->isRunning()) { // if(!this->m_ii->isRunning()) {
this->m_ii->start(); // this->m_ii->start();
// }
if(!this->m_iw->isRunning()) {
this->m_iw->start();
} }
qDebug() << "Search method has been set to INDEXSEARCH"; qDebug() << "Search method has been set to INDEXSEARCH";
} }
if(FileUtils::SearchMethod::DIRECTSEARCH == sm) {
m_iw->stopWatch();
}
qWarning() << "searchMethod end: " << static_cast<int>(FileUtils::searchMethod); qWarning() << "searchMethod end: " << static_cast<int>(FileUtils::searchMethod);
} }

View File

@ -2,15 +2,17 @@
#define SEARCHMETHODMANAGER_H #define SEARCHMETHODMANAGER_H
#include "first-index.h" #include "first-index.h"
#include "inotify-index.h" //#include "inotify-index.h"
#include "inotify-watch.h"
namespace Zeeker { namespace Zeeker {
class SearchMethodManager { class SearchMethodManager {
public: public:
SearchMethodManager() = default; SearchMethodManager();
void searchMethod(FileUtils::SearchMethod sm); void searchMethod(FileUtils::SearchMethod sm);
private: private:
FirstIndex m_fi; FirstIndex m_fi;
InotifyIndex* m_ii; // InotifyIndex* m_ii;
InotifyWatch *m_iw = nullptr;
}; };
} }

View File

@ -42,5 +42,15 @@ void UkuiSearchQDBus::setInotifyMaxUserWatches() {
// sysctl // sysctl
this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2"); this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2");
// /etc/sysctl.conf // /etc/sysctl.conf
// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3"); // this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3");
}
int UkuiSearchQDBus::addInotifyUserInstances(int addNum)
{
QDBusReply<int> reply = tmpSystemQDBusInterface->call("AddInotifyMaxUserInstance", addNum);
if(reply.isValid()) {
qDebug() << "Set inotify_max_user_instances to" << reply.value();
} else {
qWarning() << "Call AddInotifyMaxUserInstance failed!";
}
} }

View File

@ -21,12 +21,14 @@
#define UKUISEARCHQDBUS_H #define UKUISEARCHQDBUS_H
#include <QDBusInterface> #include <QDBusInterface>
#include <QDBusReply>
namespace Zeeker { namespace Zeeker {
class UkuiSearchQDBus { class UkuiSearchQDBus {
public: public:
UkuiSearchQDBus(); UkuiSearchQDBus();
~UkuiSearchQDBus(); ~UkuiSearchQDBus();
void setInotifyMaxUserWatches(); void setInotifyMaxUserWatches();
int addInotifyUserInstances(int addNum);
private: private:
QDBusInterface* tmpSystemQDBusInterface; QDBusInterface* tmpSystemQDBusInterface;
}; };

View File

@ -26,12 +26,15 @@
#include "file-utils.h" #include "file-utils.h"
#include "global-settings.h" #include "global-settings.h"
#include "plugininterface/search-plugin-iface.h"
#include "plugininterface/data-queue.h"
#include "index/searchmethodmanager.h" #include "index/searchmethodmanager.h"
#include "index/first-index.h" #include "index/first-index.h"
#include "index/ukui-search-qdbus.h" #include "index/ukui-search-qdbus.h"
#include "index/inotify-index.h" #include "index/inotify-index.h"
#include "index/search-manager.h" #include "index/search-manager.h"
namespace Zeeker { namespace Zeeker {
//class LIBSEARCH_EXPORT GlobalSearch { //class LIBSEARCH_EXPORT GlobalSearch {
//public: //public:

View File

@ -24,16 +24,17 @@ DEFINES += PLUGIN_INSTALL_DIRS='\\"$${PLUGIN_INSTALL_DIRS}\\"'
# In order to do so, uncomment the following line. # In order to do so, uncomment the following line.
# You can also select to disable deprecated APIs only up to a certain version of Qt. # You can also select to disable deprecated APIs only up to a certain version of Qt.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
include(pluginmanage/plugin-manager.pri)
include(plugininterface/plugin-interface.pri)
include(index/index.pri) include(index/index.pri)
include(parser/parser.pri)) include(parser/parser.pri))
include(appsearch/appsearch.pri) include(appsearch/appsearch.pri)
include(settingsearch/settingsearch.pri)) include(settingsearch/settingsearch.pri))
include(plugininterface/plugin-interface.pri)
include(pluginmanage/plugin-manager.pri)
LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation
LIBS += -lxapian -lquazip5 -luchardet LIBS += -lxapian -lquazip5 -luchardet #-L/usr/local/lib/libjemalloc -ljemalloc
SOURCES += \ SOURCES += \
file-utils.cpp \ file-utils.cpp \
@ -52,13 +53,13 @@ HEADERS += \
RESOURCES += \ RESOURCES += \
resource1.qrc \ resource1.qrc \
#TRANSLATIONS += \ TRANSLATIONS += \
# ../translations/libsearch/libukui-search_zh_CN.ts ../translations/libukui-search/libukui-search_zh_CN.ts
#qm_files.path = /usr/share/ukui-search/translations/libsearch/ qm_files.path = /usr/share/ukui-search/translations/
#qm_files.files = $$OUT_PWD/.qm/*.qm qm_files.files = $$OUT_PWD/.qm/*.qm
#INSTALLS += qm_files INSTALLS += qm_files
# Default rules for deployment. # Default rules for deployment.
@ -67,11 +68,9 @@ unix {
INSTALLS += target INSTALLS += target
header.path = /usr/include/ukui-search header.path = /usr/include/ukui-search
header.files += *.h index/*.h appsearch/*.h settingsearch/*.h header.files += *.h index/*.h appsearch/*.h settingsearch/*.h plugininterface/*.h
INSTALLS += header INSTALLS += header
} }
INCLUDEPATH += $$PWD/../libchinese-segmentation INCLUDEPATH += $$PWD/../libchinese-segmentation
DEPENDPATH += $$PWD/../libchinese-segmentation DEPENDPATH += $$PWD/../libchinese-segmentation

View File

@ -0,0 +1,8 @@
INCLUDEPATH += $$PWD
INCLUDEPATH += $$PWD/plugininterface
INCLUDEPATH += $$PWD/index
INCLUDEPATH += $$PWD/parser
INCLUDEPATH += $$PWD/pluginmanage
INCLUDEPATH += $$PWD/settingsearch
INCLUDEPATH += $$PWD/appsearch

Some files were not shown because too many files have changed in this diff Show More