forked from openkylin/ukui-search
Merge branch '0720-newfrontend' into 'new-fronted'
0720 update. See merge request kylin-desktop/ukui-search!65
This commit is contained in:
commit
200a6059c9
|
@ -14,7 +14,6 @@
|
||||||
*.so.*
|
*.so.*
|
||||||
*_pch.h.cpp
|
*_pch.h.cpp
|
||||||
*_resource.rc
|
*_resource.rc
|
||||||
*.qm
|
|
||||||
.#*
|
.#*
|
||||||
*.*#
|
*.*#
|
||||||
core
|
core
|
||||||
|
|
15
README.md
15
README.md
|
@ -1,4 +1,17 @@
|
||||||
# ukui-search
|
# ukui-search
|
||||||
|
|
||||||
[WIP] UKUI Search is a user-wide desktop search feature of UKUI desktop environment.
|
[dWIP] UKUI Search is a user-wide desktop search feature of UKUI desktop environment.
|
||||||
|
|
||||||
|
Build from source
|
||||||
|
|
||||||
|
|
||||||
|
git clone https://github.com/ukui/ukui-search.git
|
||||||
|
|
||||||
|
cd ukui-search && mkdir build && cd build
|
||||||
|
|
||||||
|
qmake .. && make
|
||||||
|
|
||||||
|
sudo make install
|
||||||
|
|
||||||
|
/usr/bin/ukui-search
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ Exec=/usr/bin/ukui-search -s
|
||||||
Type=Application
|
Type=Application
|
||||||
Icon=kylin-search
|
Icon=kylin-search
|
||||||
X-UKUI-AutoRestart=true
|
X-UKUI-AutoRestart=true
|
||||||
|
NoDisplay=true
|
||||||
OnlyShowIn=UKUI
|
OnlyShowIn=UKUI
|
||||||
X-UKUI-Autostart-Phase=Application
|
X-UKUI-Autostart-Phase=Application
|
||||||
Terminal=false
|
Terminal=false
|
||||||
|
|
|
@ -1,3 +1,10 @@
|
||||||
|
ukui-search (0.4.0+0530) v101; urgency=medium
|
||||||
|
|
||||||
|
* Bug 57129
|
||||||
|
* 任务 无
|
||||||
|
|
||||||
|
-- zhangpengfei <zhangpengfei@kylinos.cn> Sun, 30 May 2021 11:21:37 +0800
|
||||||
|
|
||||||
ukui-search (0.4.0+0520) v101; urgency=medium
|
ukui-search (0.4.0+0520) v101; urgency=medium
|
||||||
|
|
||||||
* Bug 55034,55545,55326,55496
|
* Bug 55034,55545,55326,55496
|
||||||
|
|
|
@ -130,10 +130,10 @@ void HomePage::createSection(const QString §ion_name, const HomePageItemShap
|
||||||
this->appendSection(section);
|
this->appendSection(section);
|
||||||
connect(section, &HomePageSection::requestAction, this, [ = ](const QString &key, const QString &action, const QString &pluginId) {
|
connect(section, &HomePageSection::requestAction, this, [ = ](const QString &key, const QString &action, const QString &pluginId) {
|
||||||
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(pluginId);
|
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(pluginId);
|
||||||
if (plugin) {
|
// if (plugin) {
|
||||||
plugin->openAction(action, key);
|
// plugin->openAction(action, key);
|
||||||
} else {
|
// } else {
|
||||||
qWarning()<<"Get plugin failed!";
|
// qWarning()<<"Get plugin failed!";
|
||||||
}
|
// }
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,11 +27,11 @@ using namespace Zeeker;
|
||||||
#define DETAIL_BACKGROUND_COLOR QColor(0, 0, 0, 0)
|
#define DETAIL_BACKGROUND_COLOR QColor(0, 0, 0, 0)
|
||||||
#define DETAIL_WIDGET_TRANSPARENT 0.04
|
#define DETAIL_WIDGET_TRANSPARENT 0.04
|
||||||
#define DETAIL_WIDGET_BORDER_RADIUS 4
|
#define DETAIL_WIDGET_BORDER_RADIUS 4
|
||||||
#define DETAIL_WIDGET_MARGINS 8,40,40,8
|
#define DETAIL_WIDGET_MARGINS 8,0,8,0
|
||||||
#define DETAIL_FRAME_MARGINS 8,0,8,0
|
#define DETAIL_FRAME_MARGINS 8,0,0,0
|
||||||
#define DETAIL_ICON_HEIGHT 120
|
#define DETAIL_ICON_HEIGHT 120
|
||||||
#define NAME_LABEL_WIDTH 280
|
#define NAME_LABEL_WIDTH 280
|
||||||
#define ICON_SIZE QSize(96, 96)
|
#define ICON_SIZE QSize(120, 120)
|
||||||
#define LINE_STYLE "QFrame{background: rgba(0,0,0,0.2);}"
|
#define LINE_STYLE "QFrame{background: rgba(0,0,0,0.2);}"
|
||||||
#define ACTION_NORMAL_COLOR QColor(55, 144, 250, 255)
|
#define ACTION_NORMAL_COLOR QColor(55, 144, 250, 255)
|
||||||
#define ACTION_HOVER_COLOR QColor(64, 169, 251, 255)
|
#define ACTION_HOVER_COLOR QColor(64, 169, 251, 255)
|
||||||
|
@ -138,8 +138,18 @@ QString escapeHtml(const QString & str) {
|
||||||
|
|
||||||
void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginIface::ResultInfo &info)
|
void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginIface::ResultInfo &info)
|
||||||
{
|
{
|
||||||
m_iconLabel->setPixmap(info.icon.pixmap(info.icon.actualSize(ICON_SIZE)));
|
clearLayout(m_descFrameLyt);
|
||||||
m_iconLabel->show();
|
clearLayout(m_previewFrameLyt);
|
||||||
|
if(SearchPluginManager::getInstance()->getPlugin(plugin_name)->isPreviewEnable(info.actionKey,info.type)) {
|
||||||
|
m_iconLabel->hide();
|
||||||
|
m_previewFrameLyt->addWidget(SearchPluginManager::getInstance()->getPlugin(plugin_name)->previewPage(info.actionKey,info.type, m_previewFrame), 0 , Qt::AlignHCenter);
|
||||||
|
m_previewFrameLyt->setContentsMargins(0,0,0,0);
|
||||||
|
m_previewFrame->show();
|
||||||
|
} else {
|
||||||
|
m_previewFrame->hide();
|
||||||
|
m_iconLabel->setPixmap(info.icon.pixmap(info.icon.actualSize(ICON_SIZE)));
|
||||||
|
m_iconLabel->show();
|
||||||
|
}
|
||||||
QFontMetrics fontMetrics = m_nameLabel->fontMetrics();
|
QFontMetrics fontMetrics = m_nameLabel->fontMetrics();
|
||||||
QString name = fontMetrics.elidedText(info.name, Qt::ElideRight, NAME_LABEL_WIDTH - 8);
|
QString name = fontMetrics.elidedText(info.name, Qt::ElideRight, NAME_LABEL_WIDTH - 8);
|
||||||
m_nameLabel->setText(QString("<h3 style=\"font-weight:normal;\">%1</h3>").arg(escapeHtml(name)));
|
m_nameLabel->setText(QString("<h3 style=\"font-weight:normal;\">%1</h3>").arg(escapeHtml(name)));
|
||||||
|
@ -147,6 +157,7 @@ void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginI
|
||||||
m_pluginLabel->setText(plugin_name);
|
m_pluginLabel->setText(plugin_name);
|
||||||
m_nameFrame->show();
|
m_nameFrame->show();
|
||||||
m_line_1->show();
|
m_line_1->show();
|
||||||
|
|
||||||
if (info.description.length() > 0) {
|
if (info.description.length() > 0) {
|
||||||
//NEW_TODO 样式待优化
|
//NEW_TODO 样式待优化
|
||||||
clearLayout(m_descFrameLyt);
|
clearLayout(m_descFrameLyt);
|
||||||
|
@ -154,7 +165,7 @@ void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginI
|
||||||
QLabel * descLabel = new QLabel(m_descFrame);
|
QLabel * descLabel = new QLabel(m_descFrame);
|
||||||
descLabel->setTextFormat(Qt::PlainText);
|
descLabel->setTextFormat(Qt::PlainText);
|
||||||
descLabel->setWordWrap(true);
|
descLabel->setWordWrap(true);
|
||||||
QString show_desc = desc.key + ": " + desc.value;
|
QString show_desc = desc.key + " " + desc.value;
|
||||||
descLabel->setText(show_desc);
|
descLabel->setText(show_desc);
|
||||||
m_descFrameLyt->addWidget(descLabel);
|
m_descFrameLyt->addWidget(descLabel);
|
||||||
}
|
}
|
||||||
|
@ -162,8 +173,8 @@ void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginI
|
||||||
m_line_2->show();
|
m_line_2->show();
|
||||||
}
|
}
|
||||||
clearLayout(m_actionFrameLyt);
|
clearLayout(m_actionFrameLyt);
|
||||||
Q_FOREACH (auto action, info.actionList) {
|
Q_FOREACH (SearchPluginIface::Actioninfo actioninfo, SearchPluginManager::getInstance()->getPlugin(plugin_name)->getActioninfo(info.type)) {
|
||||||
ActionLabel * actionLabel = new ActionLabel(action, info.key, plugin_name, m_actionFrame);
|
ActionLabel * actionLabel = new ActionLabel(actioninfo.displayName, info.actionKey, actioninfo.actionkey, plugin_name, info.type, m_actionFrame);
|
||||||
m_actionFrameLyt->addWidget(actionLabel);
|
m_actionFrameLyt->addWidget(actionLabel);
|
||||||
}
|
}
|
||||||
m_actionFrame->show();
|
m_actionFrame->show();
|
||||||
|
@ -189,6 +200,8 @@ void DetailWidget::initUi()
|
||||||
m_iconLabel = new QLabel(this);
|
m_iconLabel = new QLabel(this);
|
||||||
m_iconLabel->setFixedHeight(DETAIL_ICON_HEIGHT);
|
m_iconLabel->setFixedHeight(DETAIL_ICON_HEIGHT);
|
||||||
m_iconLabel->setAlignment(Qt::AlignCenter);
|
m_iconLabel->setAlignment(Qt::AlignCenter);
|
||||||
|
m_previewFrame = new QFrame(this);
|
||||||
|
m_previewFrameLyt = new QHBoxLayout(m_previewFrame);
|
||||||
|
|
||||||
m_nameFrame = new QFrame(this);
|
m_nameFrame = new QFrame(this);
|
||||||
m_nameFrameLyt = new QHBoxLayout(m_nameFrame);
|
m_nameFrameLyt = new QHBoxLayout(m_nameFrame);
|
||||||
|
@ -222,6 +235,7 @@ void DetailWidget::initUi()
|
||||||
m_actionFrameLyt->setContentsMargins(DETAIL_FRAME_MARGINS);
|
m_actionFrameLyt->setContentsMargins(DETAIL_FRAME_MARGINS);
|
||||||
|
|
||||||
m_mainLyt->addWidget(m_iconLabel);
|
m_mainLyt->addWidget(m_iconLabel);
|
||||||
|
m_mainLyt->addWidget(m_previewFrame, 0, Qt::AlignHCenter);
|
||||||
m_mainLyt->addWidget(m_nameFrame);
|
m_mainLyt->addWidget(m_nameFrame);
|
||||||
m_mainLyt->addWidget(m_line_1);
|
m_mainLyt->addWidget(m_line_1);
|
||||||
m_mainLyt->addWidget(m_descFrame);
|
m_mainLyt->addWidget(m_descFrame);
|
||||||
|
@ -248,7 +262,7 @@ void DetailWidget::paintEvent(QPaintEvent * event)
|
||||||
|
|
||||||
void DetailWidget::clearLayout(QLayout *layout)
|
void DetailWidget::clearLayout(QLayout *layout)
|
||||||
{
|
{
|
||||||
if(! layout) return;
|
if(!layout) return;
|
||||||
QLayoutItem * child;
|
QLayoutItem * child;
|
||||||
while((child = layout->takeAt(0)) != 0) {
|
while((child = layout->takeAt(0)) != 0) {
|
||||||
if(child->widget()) {
|
if(child->widget()) {
|
||||||
|
@ -259,11 +273,13 @@ void DetailWidget::clearLayout(QLayout *layout)
|
||||||
child = NULL;
|
child = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ActionLabel::ActionLabel(const QString &action, const QString &key, const QString &plugin, QWidget *parent) : QLabel(parent)
|
ActionLabel::ActionLabel(const QString &action, const QString &key, const int &ActionKey, const QString &pluginId, const int type, QWidget *parent) : QLabel(parent)
|
||||||
{
|
{
|
||||||
m_action = action;
|
m_action = action;
|
||||||
m_key = key;
|
m_key = key;
|
||||||
m_plugin = plugin;
|
m_actionKey = ActionKey;
|
||||||
|
m_type = type;
|
||||||
|
m_pluginId = pluginId;
|
||||||
this->initUi();
|
this->initUi();
|
||||||
this->installEventFilter(this);
|
this->installEventFilter(this);
|
||||||
}
|
}
|
||||||
|
@ -287,9 +303,9 @@ bool ActionLabel::eventFilter(QObject *watched, QEvent *event)
|
||||||
this->setForegroundRole(QPalette::Dark);
|
this->setForegroundRole(QPalette::Dark);
|
||||||
return true;
|
return true;
|
||||||
} else if(event->type() == QEvent::MouseButtonRelease) {
|
} else if(event->type() == QEvent::MouseButtonRelease) {
|
||||||
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin);
|
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_pluginId);
|
||||||
if (plugin)
|
if (plugin)
|
||||||
plugin->openAction(m_action, m_key);
|
plugin->openAction(m_actionKey, m_key, m_type);
|
||||||
else
|
else
|
||||||
qWarning()<<"Get plugin failed!";
|
qWarning()<<"Get plugin failed!";
|
||||||
this->setForegroundRole(QPalette::Light);
|
this->setForegroundRole(QPalette::Light);
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
#include <QPainter>
|
#include <QPainter>
|
||||||
#include <QStyleOption>
|
#include <QStyleOption>
|
||||||
#include "result-view.h"
|
#include "result-view.h"
|
||||||
#include "plugininterface/search-plugin-iface.h"
|
#include "search-plugin-iface.h"
|
||||||
|
|
||||||
namespace Zeeker {
|
namespace Zeeker {
|
||||||
class ResultArea : public QScrollArea
|
class ResultArea : public QScrollArea
|
||||||
|
@ -70,6 +70,8 @@ private:
|
||||||
void clearLayout(QLayout *);
|
void clearLayout(QLayout *);
|
||||||
QVBoxLayout * m_mainLyt = nullptr;
|
QVBoxLayout * m_mainLyt = nullptr;
|
||||||
QLabel * m_iconLabel = nullptr;
|
QLabel * m_iconLabel = nullptr;
|
||||||
|
QFrame *m_previewFrame = nullptr;
|
||||||
|
QHBoxLayout *m_previewFrameLyt = nullptr;
|
||||||
QFrame * m_nameFrame = nullptr;
|
QFrame * m_nameFrame = nullptr;
|
||||||
QHBoxLayout * m_nameFrameLyt = nullptr;
|
QHBoxLayout * m_nameFrameLyt = nullptr;
|
||||||
QLabel * m_nameLabel = nullptr;
|
QLabel * m_nameLabel = nullptr;
|
||||||
|
@ -100,13 +102,15 @@ class ActionLabel : public QLabel
|
||||||
{
|
{
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
public:
|
public:
|
||||||
ActionLabel(const QString &action, const QString &key, const QString &plugin, QWidget *parent = nullptr);
|
ActionLabel(const QString &action, const QString &key, const int &ActionKey, const QString &pluginId, const int type = 0, QWidget *parent = nullptr);
|
||||||
~ActionLabel() = default;
|
~ActionLabel() = default;
|
||||||
private:
|
private:
|
||||||
void initUi();
|
void initUi();
|
||||||
QString m_action;
|
QString m_action;
|
||||||
QString m_key;
|
QString m_key;
|
||||||
QString m_plugin;
|
int m_actionKey;
|
||||||
|
int m_type = 0;
|
||||||
|
QString m_pluginId;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool eventFilter(QObject *, QEvent *);
|
bool eventFilter(QObject *, QEvent *);
|
||||||
|
|
|
@ -21,8 +21,8 @@
|
||||||
#include "search-page.h"
|
#include "search-page.h"
|
||||||
using namespace Zeeker;
|
using namespace Zeeker;
|
||||||
|
|
||||||
#define RESULT_WIDTH 240
|
#define RESULT_WIDTH 266
|
||||||
#define DETAIL_WIDTH 400
|
#define DETAIL_WIDTH 374
|
||||||
|
|
||||||
SearchPage::SearchPage(QWidget *parent) : QWidget(parent)
|
SearchPage::SearchPage(QWidget *parent) : QWidget(parent)
|
||||||
{
|
{
|
||||||
|
@ -54,6 +54,7 @@ void SearchPage::appendPlugin(const QString &plugin_id)
|
||||||
void SearchPage::initUi()
|
void SearchPage::initUi()
|
||||||
{
|
{
|
||||||
m_splitter = new QSplitter(this);
|
m_splitter = new QSplitter(this);
|
||||||
|
m_splitter->setContentsMargins(0, 0, 0, 0);
|
||||||
m_resultArea = new ResultArea(m_splitter);
|
m_resultArea = new ResultArea(m_splitter);
|
||||||
m_detailArea = new DetailArea(m_splitter);
|
m_detailArea = new DetailArea(m_splitter);
|
||||||
m_splitter->addWidget(m_resultArea);
|
m_splitter->addWidget(m_resultArea);
|
||||||
|
|
|
@ -20,13 +20,14 @@ DEFINES += QT_DEPRECATED_WARNINGS
|
||||||
# In order to do so, uncomment the following line.
|
# In order to do so, uncomment the following line.
|
||||||
# You can also select to disable deprecated APIs only up to a certain version of Qt.
|
# You can also select to disable deprecated APIs only up to a certain version of Qt.
|
||||||
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
||||||
|
include(../libsearch/libukui-search-headers.pri)
|
||||||
include(control/control.pri)
|
include(control/control.pri)
|
||||||
include(model/model.pri)
|
include(model/model.pri)
|
||||||
include(xatom/xatom.pri)
|
include(xatom/xatom.pri)
|
||||||
include(singleapplication/qt-single-application.pri)
|
include(singleapplication/qt-single-application.pri)
|
||||||
include(view/view.pri)
|
include(view/view.pri)
|
||||||
|
|
||||||
|
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
main.cpp \
|
main.cpp \
|
||||||
mainwindow.cpp
|
mainwindow.cpp
|
||||||
|
|
|
@ -253,6 +253,14 @@ int main(int argc, char *argv[]) {
|
||||||
qDebug() << "Load translations file" << QLocale() << "failed!";
|
qDebug() << "Load translations file" << QLocale() << "failed!";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QTranslator lib_translator;
|
||||||
|
try {
|
||||||
|
if(! lib_translator.load("/usr/share/ukui-search/translations/libukui-search_" + QLocale::system().name())) throw - 1;
|
||||||
|
app.installTranslator(&lib_translator);
|
||||||
|
} catch(...) {
|
||||||
|
qDebug() << "Load translations file" << QLocale() << "failed!";
|
||||||
|
}
|
||||||
|
|
||||||
//set main window to the center of screen
|
//set main window to the center of screen
|
||||||
MainWindow *w = new MainWindow;
|
MainWindow *w = new MainWindow;
|
||||||
qApp->setWindowIcon(QIcon::fromTheme("kylin-search"));
|
qApp->setWindowIcon(QIcon::fromTheme("kylin-search"));
|
||||||
|
@ -275,7 +283,7 @@ int main(int argc, char *argv[]) {
|
||||||
QObject::connect(&app, &QtSingleApplication::messageReceived, w, &MainWindow::bootOptionsFilter);
|
QObject::connect(&app, &QtSingleApplication::messageReceived, w, &MainWindow::bootOptionsFilter);
|
||||||
|
|
||||||
// Start app search thread
|
// Start app search thread
|
||||||
AppMatch::getAppMatch()->start();
|
// AppMatch::getAppMatch()->start();
|
||||||
|
|
||||||
// NEW_TODO
|
// NEW_TODO
|
||||||
// Set threads which in global thread pool expiry time in 5ms, some prolems here
|
// Set threads which in global thread pool expiry time in 5ms, some prolems here
|
||||||
|
|
|
@ -37,17 +37,17 @@
|
||||||
#include "qt-single-application.h"
|
#include "qt-single-application.h"
|
||||||
#include "global-settings.h"
|
#include "global-settings.h"
|
||||||
|
|
||||||
#define MAIN_MARGINS 16,8,16,16
|
#define MAIN_MARGINS 16,16,16,16
|
||||||
#define TITLE_MARGINS 0,0,0,0
|
#define TITLE_MARGINS 0,0,0,0
|
||||||
#define UKUI_SEARCH_SCHEMAS "org.ukui.search.settings"
|
#define UKUI_SEARCH_SCHEMAS "org.ukui.search.settings"
|
||||||
#define SEARCH_METHOD_KEY "indexSearch"
|
#define SEARCH_METHOD_KEY "indexSearch"
|
||||||
#define WEB_ENGINE_KEY "webEngine"
|
#define WEB_ENGINE_KEY "webEngine"
|
||||||
#define WINDOW_WIDTH 640
|
#define WINDOW_WIDTH 680
|
||||||
#define WINDOW_HEIGHT 590
|
#define WINDOW_HEIGHT 600
|
||||||
#define TITLE_HEIGHT 40
|
#define TITLE_HEIGHT 40
|
||||||
#define WINDOW_ICON_SIZE 24
|
#define WINDOW_ICON_SIZE 24
|
||||||
#define SETTING_BTN_SIZE 30
|
#define SETTING_BTN_SIZE 30
|
||||||
#define SEARCH_BAR_SIZE 44
|
#define SEARCH_BAR_SIZE 48
|
||||||
#define ASK_INDEX_TIME 5*1000
|
#define ASK_INDEX_TIME 5*1000
|
||||||
#define RESEARCH_TIME 10*1000
|
#define RESEARCH_TIME 10*1000
|
||||||
|
|
||||||
|
@ -95,7 +95,7 @@ MainWindow::MainWindow(QWidget *parent) :
|
||||||
//NEW_TODO, register plugins
|
//NEW_TODO, register plugins
|
||||||
// SearchPluginManager::getInstance()->registerPlugin(\\);
|
// SearchPluginManager::getInstance()->registerPlugin(\\);
|
||||||
// m_stackedWidget->setPlugins(SearchPluginManager::getInstance()->getPluginIds());
|
// m_stackedWidget->setPlugins(SearchPluginManager::getInstance()->getPluginIds());
|
||||||
m_stackedWidget->setPlugins(QStringList()<<"File"<<"Folder");
|
m_stackedWidget->setPlugins(SearchPluginManager::getInstance()->getPluginIds());
|
||||||
}
|
}
|
||||||
|
|
||||||
MainWindow::~MainWindow() {
|
MainWindow::~MainWindow() {
|
||||||
|
@ -140,25 +140,25 @@ void MainWindow::initUi() {
|
||||||
mainlayout->setContentsMargins(MAIN_MARGINS);
|
mainlayout->setContentsMargins(MAIN_MARGINS);
|
||||||
m_frame->setLayout(mainlayout);
|
m_frame->setLayout(mainlayout);
|
||||||
|
|
||||||
m_titleFrame = new QFrame(m_frame);//标题栏
|
// m_titleFrame = new QFrame(m_frame);//标题栏
|
||||||
m_titleFrame->setFixedHeight(TITLE_HEIGHT);
|
// m_titleFrame->setFixedHeight(TITLE_HEIGHT);
|
||||||
m_titleLyt = new QHBoxLayout(m_titleFrame);
|
// m_titleLyt = new QHBoxLayout(m_titleFrame);
|
||||||
m_titleLyt->setContentsMargins(TITLE_MARGINS);
|
// m_titleLyt->setContentsMargins(TITLE_MARGINS);
|
||||||
m_iconLabel = new QLabel(m_titleFrame);
|
// m_iconLabel = new QLabel(m_titleFrame);
|
||||||
m_iconLabel->setFixedSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE);
|
// m_iconLabel->setFixedSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE);
|
||||||
m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE)));
|
// m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE)));
|
||||||
m_titleLabel = new QLabel(m_titleFrame);
|
// m_titleLabel = new QLabel(m_titleFrame);
|
||||||
m_titleLabel->setText(tr("Search"));
|
// m_titleLabel->setText(tr("Search"));
|
||||||
m_settingsBtn = new QPushButton(m_titleFrame);
|
// m_settingsBtn = new QPushButton(m_titleFrame);
|
||||||
m_settingsBtn->setFixedSize(SETTING_BTN_SIZE, SETTING_BTN_SIZE);
|
// m_settingsBtn->setFixedSize(SETTING_BTN_SIZE, SETTING_BTN_SIZE);
|
||||||
m_settingsBtn->setIcon(QIcon::fromTheme("document-properties-symbolic"));
|
// m_settingsBtn->setIcon(QIcon::fromTheme("document-properties-symbolic"));
|
||||||
m_settingsBtn->setProperty("useIconHighlightEffect", 0x2);
|
// m_settingsBtn->setProperty("useIconHighlightEffect", 0x2);
|
||||||
m_settingsBtn->setProperty("isWindowButton", 0x01);
|
// m_settingsBtn->setProperty("isWindowButton", 0x01);
|
||||||
m_settingsBtn->setFlat(true);
|
// m_settingsBtn->setFlat(true);
|
||||||
m_titleLyt->addWidget(m_iconLabel);
|
// m_titleLyt->addWidget(m_iconLabel);
|
||||||
m_titleLyt->addWidget(m_titleLabel);
|
// m_titleLyt->addWidget(m_titleLabel);
|
||||||
m_titleLyt->addStretch();
|
// m_titleLyt->addStretch();
|
||||||
m_titleLyt->addWidget(m_settingsBtn);
|
// m_titleLyt->addWidget(m_settingsBtn);
|
||||||
m_stackedWidget = new StackedWidget(m_frame);//内容栏
|
m_stackedWidget = new StackedWidget(m_frame);//内容栏
|
||||||
|
|
||||||
m_searchWidget = new SeachBarWidget(this);
|
m_searchWidget = new SeachBarWidget(this);
|
||||||
|
@ -166,9 +166,9 @@ void MainWindow::initUi() {
|
||||||
m_searchWidget->setLayout(m_searchLayout);
|
m_searchWidget->setLayout(m_searchLayout);
|
||||||
m_searchWidget->setFixedHeight(SEARCH_BAR_SIZE);
|
m_searchWidget->setFixedHeight(SEARCH_BAR_SIZE);
|
||||||
|
|
||||||
mainlayout->addWidget(m_titleFrame);
|
// mainlayout->addWidget(m_titleFrame);
|
||||||
mainlayout->addWidget(m_stackedWidget);
|
|
||||||
mainlayout->addWidget(m_searchWidget);
|
mainlayout->addWidget(m_searchWidget);
|
||||||
|
mainlayout->addWidget(m_stackedWidget);
|
||||||
|
|
||||||
//创建索引询问弹窗
|
//创建索引询问弹窗
|
||||||
m_askDialog = new CreateIndexAskDialog(this);
|
m_askDialog = new CreateIndexAskDialog(this);
|
||||||
|
@ -197,9 +197,9 @@ void MainWindow::initConnections()
|
||||||
});
|
});
|
||||||
connect(m_settingsBtn, &QPushButton::clicked, this, &MainWindow::settingsBtnClickedSlot);
|
connect(m_settingsBtn, &QPushButton::clicked, this, &MainWindow::settingsBtnClickedSlot);
|
||||||
//主题改变时,更新自定义标题栏的图标
|
//主题改变时,更新自定义标题栏的图标
|
||||||
connect(qApp, &QApplication::paletteChanged, this, [ = ]() {
|
// connect(qApp, &QApplication::paletteChanged, this, [ = ]() {
|
||||||
m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE)));
|
// m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE)));
|
||||||
});
|
// });
|
||||||
connect(m_searchLayout, &SearchBarHLayout::requestSearchKeyword, this, &MainWindow::searchKeywordSlot);
|
connect(m_searchLayout, &SearchBarHLayout::requestSearchKeyword, this, &MainWindow::searchKeywordSlot);
|
||||||
connect(m_stackedWidget, &StackedWidget::effectiveSearch, m_searchLayout, &SearchBarHLayout::effectiveSearchRecord);
|
connect(m_stackedWidget, &StackedWidget::effectiveSearch, m_searchLayout, &SearchBarHLayout::effectiveSearchRecord);
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ using namespace Zeeker;
|
||||||
SearchResultManager::SearchResultManager(const QString& plugin_id, QObject *parent) : QObject(parent)
|
SearchResultManager::SearchResultManager(const QString& plugin_id, QObject *parent) : QObject(parent)
|
||||||
{
|
{
|
||||||
m_plugin_id = plugin_id;
|
m_plugin_id = plugin_id;
|
||||||
m_result_queue = new QQueue<SearchPluginIface::ResultInfo>;
|
m_result_queue = new DataQueue<SearchPluginIface::ResultInfo>;
|
||||||
m_get_result_thread = new ReceiveResultThread(m_result_queue);
|
m_get_result_thread = new ReceiveResultThread(m_result_queue);
|
||||||
initConnections();
|
initConnections();
|
||||||
}
|
}
|
||||||
|
@ -39,59 +39,59 @@ void SearchResultManager::startSearch(const QString &keyword)
|
||||||
}
|
}
|
||||||
m_result_queue->clear();
|
m_result_queue->clear();
|
||||||
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id);
|
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id);
|
||||||
// plugin->KeywordSearch(keyword, m_result_queue);
|
plugin->KeywordSearch(keyword, m_result_queue);
|
||||||
/*********************测试用数据*********************/
|
/*********************测试用数据*********************/
|
||||||
SearchPluginIface::ResultInfo test_info;
|
// SearchPluginIface::ResultInfo test_info;
|
||||||
if (m_plugin_id == "File") {
|
// if (m_plugin_id == "File") {
|
||||||
test_info.icon = QIcon::fromTheme("ukui-control-center");
|
// test_info.icon = QIcon::fromTheme("ukui-control-center");
|
||||||
test_info.name = "搜索";
|
// test_info.name = "搜索";
|
||||||
QVector<SearchPluginIface::DescriptionInfo> desc;
|
// QVector<SearchPluginIface::DescriptionInfo> desc;
|
||||||
SearchPluginIface::DescriptionInfo desc_1;
|
// SearchPluginIface::DescriptionInfo desc_1;
|
||||||
desc_1.key = "描述";
|
// desc_1.key = "描述";
|
||||||
desc_1.value = "控制面板搜索插件";
|
// desc_1.value = "控制面板搜索插件";
|
||||||
desc.append(desc_1);
|
// desc.append(desc_1);
|
||||||
QStringList actions;
|
// QStringList actions;
|
||||||
actions.append("打开");
|
// actions.append("打开");
|
||||||
test_info.description = desc;
|
// test_info.description = desc;
|
||||||
test_info.actionList = actions;
|
// test_info.actionList = actions;
|
||||||
m_result_queue->append(test_info);
|
// m_result_queue->append(test_info);
|
||||||
} else {
|
// } else {
|
||||||
test_info.icon = QIcon::fromTheme("unknown");
|
// test_info.icon = QIcon::fromTheme("unknown");
|
||||||
test_info.name = "文件12345abcde.txt";
|
// test_info.name = "文件12345abcde.txt";
|
||||||
QVector<SearchPluginIface::DescriptionInfo> desc;
|
// QVector<SearchPluginIface::DescriptionInfo> desc;
|
||||||
SearchPluginIface::DescriptionInfo desc_1;
|
// SearchPluginIface::DescriptionInfo desc_1;
|
||||||
SearchPluginIface::DescriptionInfo desc_2;
|
// SearchPluginIface::DescriptionInfo desc_2;
|
||||||
desc_1.key = "描述";
|
// desc_1.key = "描述";
|
||||||
desc_1.value = "一个文件";
|
// desc_1.value = "一个文件";
|
||||||
desc_2.key = "路径";
|
// desc_2.key = "路径";
|
||||||
desc_2.value = "一个路径/a/b/c/d/e/fffffff/文件12345abcde.txt";
|
// desc_2.value = "一个路径/a/b/c/d/e/fffffff/文件12345abcde.txt";
|
||||||
desc.append(desc_1);
|
// desc.append(desc_1);
|
||||||
desc.append(desc_2);
|
// desc.append(desc_2);
|
||||||
QStringList actions;
|
// QStringList actions;
|
||||||
actions.append("打开");
|
// actions.append("打开");
|
||||||
actions.append("复制路径");
|
// actions.append("复制路径");
|
||||||
test_info.description = desc;
|
// test_info.description = desc;
|
||||||
test_info.actionList = actions;
|
// test_info.actionList = actions;
|
||||||
SearchPluginIface::ResultInfo test_info_1 = test_info;
|
// SearchPluginIface::ResultInfo test_info_1 = test_info;
|
||||||
test_info_1.name = "文件1";
|
// test_info_1.name = "文件1";
|
||||||
SearchPluginIface::ResultInfo test_info_2 = test_info;
|
// SearchPluginIface::ResultInfo test_info_2 = test_info;
|
||||||
test_info_2.name = "文件2";
|
// test_info_2.name = "文件2";
|
||||||
SearchPluginIface::ResultInfo test_info_3 = test_info;
|
// SearchPluginIface::ResultInfo test_info_3 = test_info;
|
||||||
test_info_3.name = "文件3";
|
// test_info_3.name = "文件3";
|
||||||
SearchPluginIface::ResultInfo test_info_4 = test_info;
|
// SearchPluginIface::ResultInfo test_info_4 = test_info;
|
||||||
test_info_4.name = "文件4";
|
// test_info_4.name = "文件4";
|
||||||
SearchPluginIface::ResultInfo test_info_5 = test_info;
|
// SearchPluginIface::ResultInfo test_info_5 = test_info;
|
||||||
test_info_5.name = "文件5";
|
// test_info_5.name = "文件5";
|
||||||
SearchPluginIface::ResultInfo test_info_6 = test_info;
|
// SearchPluginIface::ResultInfo test_info_6 = test_info;
|
||||||
test_info_6.name = "文件6";
|
// test_info_6.name = "文件6";
|
||||||
m_result_queue->append(test_info);
|
// m_result_queue->append(test_info);
|
||||||
m_result_queue->append(test_info_1);
|
// m_result_queue->append(test_info_1);
|
||||||
m_result_queue->append(test_info_2);
|
// m_result_queue->append(test_info_2);
|
||||||
m_result_queue->append(test_info_3);
|
// m_result_queue->append(test_info_3);
|
||||||
m_result_queue->append(test_info_4);
|
// m_result_queue->append(test_info_4);
|
||||||
m_result_queue->append(test_info_5);
|
// m_result_queue->append(test_info_5);
|
||||||
m_result_queue->append(test_info_6);
|
// m_result_queue->append(test_info_6);
|
||||||
}
|
// }
|
||||||
/********************测试用数据********************/
|
/********************测试用数据********************/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,7 +112,7 @@ void SearchResultManager::initConnections()
|
||||||
connect(m_get_result_thread, &ReceiveResultThread::gotResultInfo, this, &SearchResultManager::gotResultInfo);
|
connect(m_get_result_thread, &ReceiveResultThread::gotResultInfo, this, &SearchResultManager::gotResultInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
ReceiveResultThread::ReceiveResultThread(QQueue<SearchPluginIface::ResultInfo> * result_queue, QObject *parent)
|
ReceiveResultThread::ReceiveResultThread(DataQueue<SearchPluginIface::ResultInfo> * result_queue, QObject *parent)
|
||||||
{
|
{
|
||||||
m_result_queue = result_queue;
|
m_result_queue = result_queue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,14 +32,14 @@ namespace Zeeker {
|
||||||
class ReceiveResultThread : public QThread {
|
class ReceiveResultThread : public QThread {
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
public:
|
public:
|
||||||
ReceiveResultThread(QQueue<SearchPluginIface::ResultInfo> * result_queue, QObject * parent = nullptr);
|
ReceiveResultThread(DataQueue<SearchPluginIface::ResultInfo> * result_queue, QObject * parent = nullptr);
|
||||||
~ReceiveResultThread() = default;
|
~ReceiveResultThread() = default;
|
||||||
void stop();
|
void stop();
|
||||||
protected:
|
protected:
|
||||||
void run() override;
|
void run() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
QQueue<SearchPluginIface::ResultInfo> * m_result_queue;
|
DataQueue<SearchPluginIface::ResultInfo> * m_result_queue;
|
||||||
|
|
||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
void gotResultInfo(const SearchPluginIface::ResultInfo&);
|
void gotResultInfo(const SearchPluginIface::ResultInfo&);
|
||||||
|
@ -60,7 +60,7 @@ public Q_SLOTS:
|
||||||
private:
|
private:
|
||||||
void initConnections();
|
void initConnections();
|
||||||
QString m_plugin_id;
|
QString m_plugin_id;
|
||||||
QQueue<SearchPluginIface::ResultInfo> * m_result_queue;
|
DataQueue<SearchPluginIface::ResultInfo> * m_result_queue;
|
||||||
ReceiveResultThread * m_get_result_thread = nullptr;
|
ReceiveResultThread * m_get_result_thread = nullptr;
|
||||||
|
|
||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
|
|
|
@ -126,14 +126,14 @@ const bool &SearchResultModel::isExpanded()
|
||||||
QStringList SearchResultModel::getActions(const QModelIndex &index)
|
QStringList SearchResultModel::getActions(const QModelIndex &index)
|
||||||
{
|
{
|
||||||
if (m_item->m_result_info_list.length() > index.row() && index.row() >= 0)
|
if (m_item->m_result_info_list.length() > index.row() && index.row() >= 0)
|
||||||
return m_item->m_result_info_list.at(index.row()).actionList;
|
// return m_item->m_result_info_list.at(index.row()).actionList;
|
||||||
return QStringList();
|
return QStringList();
|
||||||
}
|
}
|
||||||
|
|
||||||
QString SearchResultModel::getKey(const QModelIndex &index)
|
QString SearchResultModel::getKey(const QModelIndex &index)
|
||||||
{
|
{
|
||||||
if (m_item->m_result_info_list.length() > index.row() && index.row() >= 0)
|
if (m_item->m_result_info_list.length() > index.row() && index.row() >= 0)
|
||||||
return m_item->m_result_info_list.at(index.row()).key;
|
// return m_item->m_result_info_list.at(index.row()).key;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -12,6 +12,13 @@ void ResultViewDelegate::setSearchKeyword(const QString ®FindKeyWords)
|
||||||
m_regFindKeyWords = regFindKeyWords;
|
m_regFindKeyWords = regFindKeyWords;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QSize ResultViewDelegate::sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const
|
||||||
|
{
|
||||||
|
QSize size = QStyledItemDelegate::sizeHint(option,index);
|
||||||
|
size.setHeight(size.height() + 10);
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
void ResultViewDelegate::paint(QPainter * painter, const QStyleOptionViewItem & option, const QModelIndex & index) const {
|
void ResultViewDelegate::paint(QPainter * painter, const QStyleOptionViewItem & option, const QModelIndex & index) const {
|
||||||
QStyleOptionViewItemV4 optionV4 = option;
|
QStyleOptionViewItemV4 optionV4 = option;
|
||||||
initStyleOption(&optionV4, index);
|
initStyleOption(&optionV4, index);
|
||||||
|
@ -30,7 +37,7 @@ void ResultViewDelegate::paint(QPainter * painter, const QStyleOptionViewItem &
|
||||||
ctx.palette.setColor(QPalette::Text, optionV4.palette.color(QPalette::Active, QPalette::HighlightedText));
|
ctx.palette.setColor(QPalette::Text, optionV4.palette.color(QPalette::Active, QPalette::HighlightedText));
|
||||||
|
|
||||||
QRect textRect = style->subElementRect(QStyle::SE_ItemViewItemText, &optionV4);
|
QRect textRect = style->subElementRect(QStyle::SE_ItemViewItemText, &optionV4);
|
||||||
textRect.adjust(0, -5, 0, 0);
|
textRect.adjust(0, 0, 0, 0);
|
||||||
painter->save();
|
painter->save();
|
||||||
painter->translate(textRect.topLeft());
|
painter->translate(textRect.topLeft());
|
||||||
painter->setClipRect(textRect.translated(-textRect.topLeft()));
|
painter->setClipRect(textRect.translated(-textRect.topLeft()));
|
||||||
|
|
|
@ -35,6 +35,8 @@ public:
|
||||||
explicit ResultViewDelegate(QObject *parent = nullptr);
|
explicit ResultViewDelegate(QObject *parent = nullptr);
|
||||||
~ResultViewDelegate() = default;
|
~ResultViewDelegate() = default;
|
||||||
void setSearchKeyword(const QString &);
|
void setSearchKeyword(const QString &);
|
||||||
|
protected:
|
||||||
|
QSize sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const;
|
||||||
private:
|
private:
|
||||||
QString m_regFindKeyWords = 0;
|
QString m_regFindKeyWords = 0;
|
||||||
void paint(QPainter *, const QStyleOptionViewItem &, const QModelIndex &) const override;
|
void paint(QPainter *, const QStyleOptionViewItem &, const QModelIndex &) const override;
|
||||||
|
|
|
@ -155,11 +155,11 @@ void ResultView::onRowDoubleClickedSlot(const QModelIndex &index)
|
||||||
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id);
|
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id);
|
||||||
try {
|
try {
|
||||||
if (plugin) {
|
if (plugin) {
|
||||||
if (!info.actionList.isEmpty()) {
|
// if (!info.actionList.isEmpty()) {
|
||||||
plugin->openAction(info.actionList.at(0), info.key);
|
// plugin->openAction(info.actionList.at(0), info.key);
|
||||||
} else {
|
// } else {
|
||||||
throw -2;
|
// throw -2;
|
||||||
}
|
// }
|
||||||
} else {
|
} else {
|
||||||
throw -1;
|
throw -1;
|
||||||
}
|
}
|
||||||
|
@ -211,7 +211,7 @@ void ResultView::onMenuTriggered(QAction *action)
|
||||||
//NEW_TODO 接口调整后需要修改
|
//NEW_TODO 接口调整后需要修改
|
||||||
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id);
|
SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id);
|
||||||
if (plugin) {
|
if (plugin) {
|
||||||
plugin->openAction(action->text(), m_model->getKey(this->currentIndex()));
|
// plugin->openAction(action->text(), m_model->getKey(this->currentIndex()));
|
||||||
} else {
|
} else {
|
||||||
qWarning()<<"Get plugin failed!";
|
qWarning()<<"Get plugin failed!";
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,12 +30,12 @@ ChineseSegmentation::ChineseSegmentation() {
|
||||||
const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
|
const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
|
||||||
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
|
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
|
||||||
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
|
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
|
||||||
|
|
||||||
m_jieba = new cppjieba::Jieba(DICT_PATH,
|
m_jieba = new cppjieba::Jieba(DICT_PATH,
|
||||||
HMM_PATH,
|
HMM_PATH,
|
||||||
USER_DICT_PATH,
|
USER_DICT_PATH,
|
||||||
IDF_PATH,
|
IDF_PATH,
|
||||||
STOP_WORD_PATH);
|
STOP_WORD_PATH,
|
||||||
|
"");
|
||||||
}
|
}
|
||||||
|
|
||||||
ChineseSegmentation::~ChineseSegmentation() {
|
ChineseSegmentation::~ChineseSegmentation() {
|
||||||
|
@ -58,7 +58,7 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
|
||||||
// str.squeeze();
|
// str.squeeze();
|
||||||
|
|
||||||
const size_t topk = -1;
|
const size_t topk = -1;
|
||||||
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
std::vector<cppjieba::KeyWord> keywordres;
|
||||||
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
|
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
|
||||||
std::string().swap(s);
|
std::string().swap(s);
|
||||||
QVector<SKeyWord> vecNeeds;
|
QVector<SKeyWord> vecNeeds;
|
||||||
|
@ -66,13 +66,20 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
|
||||||
|
|
||||||
keywordres.clear();
|
keywordres.clear();
|
||||||
// keywordres.shrink_to_fit();
|
// keywordres.shrink_to_fit();
|
||||||
|
|
||||||
|
|
||||||
return vecNeeds;
|
return vecNeeds;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChineseSegmentation::convert(std::vector<cppjieba::KeywordExtractor::Word> &keywordres, QVector<SKeyWord> &kw) {
|
std::vector<cppjieba::KeyWord> ChineseSegmentation::callSegementStd(const std::string &str) {
|
||||||
|
|
||||||
|
const size_t topk = -1;
|
||||||
|
std::vector<cppjieba::KeyWord> keywordres;
|
||||||
|
ChineseSegmentation::m_jieba->extractor.Extract(str, keywordres, topk);
|
||||||
|
|
||||||
|
return keywordres;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ChineseSegmentation::convert(std::vector<cppjieba::KeyWord> &keywordres, QVector<SKeyWord> &kw) {
|
||||||
for(auto i : keywordres) {
|
for(auto i : keywordres) {
|
||||||
SKeyWord temp;
|
SKeyWord temp;
|
||||||
temp.word = i.word;
|
temp.word = i.word;
|
||||||
|
|
|
@ -48,7 +48,10 @@ public:
|
||||||
static ChineseSegmentation *getInstance();
|
static ChineseSegmentation *getInstance();
|
||||||
~ChineseSegmentation();
|
~ChineseSegmentation();
|
||||||
QVector<SKeyWord> callSegement(std::string s);
|
QVector<SKeyWord> callSegement(std::string s);
|
||||||
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres, QVector<SKeyWord>& kw);
|
//新添加callSegementStd函数,修改返回值为std::vector<cppjieba::KeywordExtractor::Word>并简化内部处理流程--jxx20210517
|
||||||
|
//修改函数入参形式为引用,去掉Qstring与std::string转换代码--jxx20210519
|
||||||
|
std::vector<cppjieba::KeyWord> callSegementStd(const std::string& str);
|
||||||
|
void convert(std::vector<cppjieba::KeyWord>& keywordres, QVector<SKeyWord>& kw);
|
||||||
private:
|
private:
|
||||||
static QMutex m_mutex;
|
static QMutex m_mutex;
|
||||||
cppjieba::Jieba *m_jieba;
|
cppjieba::Jieba *m_jieba;
|
||||||
|
|
|
@ -0,0 +1,511 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <QDebug>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "limonp/Md5.hpp"
|
||||||
|
#include "Unicode.hpp"
|
||||||
|
#include "darts.h"
|
||||||
|
|
||||||
|
namespace cppjieba {
|
||||||
|
|
||||||
|
using std::pair;
|
||||||
|
|
||||||
|
struct DatElement {
|
||||||
|
string word;
|
||||||
|
string tag;
|
||||||
|
double weight = 0;
|
||||||
|
|
||||||
|
bool operator < (const DatElement & b) const {
|
||||||
|
if (word == b.word) {
|
||||||
|
return this->weight > b.weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
return this->word < b.word;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct IdfElement {
|
||||||
|
string word;
|
||||||
|
double idf = 0;
|
||||||
|
|
||||||
|
bool operator < (const IdfElement & b) const {
|
||||||
|
if (word == b.word) {
|
||||||
|
return this->idf > b.idf;
|
||||||
|
}
|
||||||
|
|
||||||
|
return this->word < b.word;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
|
||||||
|
return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DatMemElem {
|
||||||
|
double weight = 0.0;
|
||||||
|
char tag[8] = {};
|
||||||
|
|
||||||
|
void SetTag(const string & str) {
|
||||||
|
memset(&tag[0], 0, sizeof(tag));
|
||||||
|
strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
string GetTag() const {
|
||||||
|
return &tag[0];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream & operator << (std::ostream& os, const DatMemElem & elem) {
|
||||||
|
return os << "/tag=" << elem.GetTag() << "/weight=" << elem.weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DatDag {
|
||||||
|
limonp::LocalVector<pair<size_t, const DatMemElem *> > nexts;
|
||||||
|
double max_weight;
|
||||||
|
int max_next;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef Darts::DoubleArray JiebaDAT;
|
||||||
|
|
||||||
|
|
||||||
|
struct CacheFileHeader {
|
||||||
|
char md5_hex[32] = {};
|
||||||
|
double min_weight = 0;
|
||||||
|
uint32_t elements_num = 0;
|
||||||
|
uint32_t dat_size = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
static_assert(sizeof(DatMemElem) == 16, "DatMemElem length invalid");
|
||||||
|
static_assert((sizeof(CacheFileHeader) % sizeof(DatMemElem)) == 0, "DatMemElem CacheFileHeader length equal");
|
||||||
|
|
||||||
|
|
||||||
|
class DatTrie {
|
||||||
|
public:
|
||||||
|
DatTrie() {}
|
||||||
|
~DatTrie() {
|
||||||
|
::munmap(mmap_addr_, mmap_length_);
|
||||||
|
mmap_addr_ = nullptr;
|
||||||
|
mmap_length_ = 0;
|
||||||
|
|
||||||
|
::close(mmap_fd_);
|
||||||
|
mmap_fd_ = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DatMemElem * Find(const string & key) const {
|
||||||
|
JiebaDAT::result_pair_type find_result;
|
||||||
|
dat_.exactMatchSearch(key.c_str(), find_result);
|
||||||
|
|
||||||
|
if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return &elements_ptr_[ find_result.value ];
|
||||||
|
}
|
||||||
|
|
||||||
|
const double Find(const string & key, std::size_t length, std::size_t node_pos) const {
|
||||||
|
JiebaDAT::result_pair_type find_result;
|
||||||
|
dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos);
|
||||||
|
|
||||||
|
if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return idf_elements_ptr_[ find_result.value ];
|
||||||
|
}
|
||||||
|
|
||||||
|
void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
|
||||||
|
vector<struct DatDag>&res, size_t max_word_len) const {
|
||||||
|
|
||||||
|
res.clear();
|
||||||
|
res.resize(end - begin);
|
||||||
|
|
||||||
|
string text_str;
|
||||||
|
EncodeRunesToString(begin, end, text_str);
|
||||||
|
|
||||||
|
static const size_t max_num = 128;
|
||||||
|
JiebaDAT::result_pair_type result_pairs[max_num] = {};
|
||||||
|
|
||||||
|
for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) {
|
||||||
|
|
||||||
|
std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
|
||||||
|
|
||||||
|
res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + 1, nullptr));
|
||||||
|
|
||||||
|
for (std::size_t idx = 0; idx < num_results; ++idx) {
|
||||||
|
auto & match = result_pairs[idx];
|
||||||
|
|
||||||
|
if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
|
||||||
|
|
||||||
|
if (char_num > max_word_len) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto pValue = &elements_ptr_[match.value];
|
||||||
|
|
||||||
|
if (1 == char_num) {
|
||||||
|
res[i].nexts[0].second = pValue;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + char_num, pValue));
|
||||||
|
}
|
||||||
|
|
||||||
|
begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Find_Reverse(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
|
||||||
|
vector<struct DatDag>&res, size_t max_word_len) const {
|
||||||
|
|
||||||
|
res.clear();
|
||||||
|
res.resize(end - begin);
|
||||||
|
|
||||||
|
string text_str;
|
||||||
|
EncodeRunesToString(begin, end, text_str);
|
||||||
|
|
||||||
|
static const size_t max_num = 128;
|
||||||
|
JiebaDAT::result_pair_type result_pairs[max_num] = {};
|
||||||
|
|
||||||
|
size_t str_size = end - begin;
|
||||||
|
for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
|
||||||
|
|
||||||
|
begin_pos -= (end - i - 1)->len;
|
||||||
|
std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
|
||||||
|
res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - i, nullptr));
|
||||||
|
|
||||||
|
for (std::size_t idx = 0; idx < num_results; ++idx) {
|
||||||
|
auto & match = result_pairs[idx];
|
||||||
|
if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
|
||||||
|
|
||||||
|
if (char_num > max_word_len) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto pValue = &elements_ptr_[match.value];
|
||||||
|
|
||||||
|
if (1 == char_num) {
|
||||||
|
res[str_size - i - 1].nexts[0].second = pValue;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - 1 - i + char_num, pValue));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
|
||||||
|
vector<WordRange>& words, size_t max_word_len) const {
|
||||||
|
|
||||||
|
string text_str;
|
||||||
|
EncodeRunesToString(begin, end, text_str);
|
||||||
|
|
||||||
|
static const size_t max_num = 128;
|
||||||
|
JiebaDAT::result_pair_type result_pairs[max_num] = {};//存放字典查询结果
|
||||||
|
size_t str_size = end - begin;
|
||||||
|
double max_weight[str_size];//存放逆向路径最大weight
|
||||||
|
for (size_t i = 0; i<str_size; i++) {
|
||||||
|
max_weight[i] = -3.14e+100;
|
||||||
|
}
|
||||||
|
int max_next[str_size];//存放动态规划后的分词结果
|
||||||
|
memset(max_next,-1,str_size);
|
||||||
|
|
||||||
|
double val(0);
|
||||||
|
for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
|
||||||
|
size_t nextPos = str_size - i;//逆向计算
|
||||||
|
begin_pos -= (end - i - 1)->len;
|
||||||
|
|
||||||
|
std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
|
||||||
|
if (0 == num_results) {//字典不存在则单独分词
|
||||||
|
val = min_weight_;
|
||||||
|
|
||||||
|
if (nextPos < str_size) {
|
||||||
|
val += max_weight[nextPos];
|
||||||
|
}
|
||||||
|
if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
|
||||||
|
max_weight[nextPos - 1] = val;
|
||||||
|
max_next[nextPos - 1] = nextPos;
|
||||||
|
}
|
||||||
|
} else {//字典存在则根据查询结果数量计算最大概率路径
|
||||||
|
for (std::size_t idx = 0; idx < num_results; ++idx) {
|
||||||
|
auto & match = result_pairs[idx];
|
||||||
|
if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
|
||||||
|
if (char_num > max_word_len) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto pValue = &elements_ptr_[match.value];
|
||||||
|
|
||||||
|
val = pValue->weight;
|
||||||
|
if (1 == char_num) {
|
||||||
|
if (nextPos < str_size) {
|
||||||
|
val += max_weight[nextPos];
|
||||||
|
}
|
||||||
|
if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
|
||||||
|
max_weight[nextPos - 1] = val;
|
||||||
|
max_next[nextPos - 1] = nextPos;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (nextPos - 1 + char_num < str_size) {
|
||||||
|
val += max_weight[nextPos - 1 + char_num];
|
||||||
|
}
|
||||||
|
if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) {
|
||||||
|
max_weight[nextPos - 1] = val;
|
||||||
|
max_next[nextPos - 1] = nextPos - 1 + char_num;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < str_size;) {//统计动态规划结果
|
||||||
|
assert(max_next[i] > i);
|
||||||
|
assert(max_next[i] <= str_size);
|
||||||
|
WordRange wr(begin + i, begin + max_next[i] - 1);
|
||||||
|
words.push_back(wr);
|
||||||
|
i = max_next[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
double GetMinWeight() const {
|
||||||
|
return min_weight_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetMinWeight(double d) {
|
||||||
|
min_weight_ = d ;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InitBuildDat(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
|
||||||
|
BuildDatCache(elements, dat_cache_file, md5);
|
||||||
|
return InitAttachDat(dat_cache_file, md5);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InitBuildDat(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
|
||||||
|
BuildDatCache(elements, dat_cache_file, md5);
|
||||||
|
return InitIdfAttachDat(dat_cache_file, md5);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InitAttachDat(const string & dat_cache_file, const string & md5) {
|
||||||
|
mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
|
||||||
|
|
||||||
|
if (mmap_fd_ < 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
|
||||||
|
assert(seek_off >= 0);
|
||||||
|
mmap_length_ = seek_off;
|
||||||
|
|
||||||
|
mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
|
||||||
|
assert(MAP_FAILED != mmap_addr_);
|
||||||
|
|
||||||
|
assert(mmap_length_ >= sizeof(CacheFileHeader));
|
||||||
|
CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
|
||||||
|
elements_num_ = header.elements_num;
|
||||||
|
min_weight_ = header.min_weight;
|
||||||
|
assert(sizeof(header.md5_hex) == md5.size());
|
||||||
|
|
||||||
|
if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(DatMemElem) + header.dat_size * dat_.unit_size());
|
||||||
|
elements_ptr_ = (const DatMemElem *)(mmap_addr_ + sizeof(header));
|
||||||
|
const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(DatMemElem) * elements_num_;
|
||||||
|
dat_.set_array(dat_ptr, header.dat_size);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InitIdfAttachDat(const string & dat_cache_file, const string & md5) {
|
||||||
|
mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
|
||||||
|
|
||||||
|
if (mmap_fd_ < 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
|
||||||
|
assert(seek_off >= 0);
|
||||||
|
mmap_length_ = seek_off;
|
||||||
|
|
||||||
|
mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
|
||||||
|
assert(MAP_FAILED != mmap_addr_);
|
||||||
|
|
||||||
|
assert(mmap_length_ >= sizeof(CacheFileHeader));
|
||||||
|
CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
|
||||||
|
elements_num_ = header.elements_num;
|
||||||
|
min_weight_ = header.min_weight;
|
||||||
|
assert(sizeof(header.md5_hex) == md5.size());
|
||||||
|
|
||||||
|
if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double) + header.dat_size * dat_.unit_size());
|
||||||
|
idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header));
|
||||||
|
const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_;
|
||||||
|
dat_.set_array(dat_ptr, header.dat_size);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
|
||||||
|
std::sort(elements.begin(), elements.end());
|
||||||
|
|
||||||
|
vector<const char*> keys_ptr_vec;
|
||||||
|
vector<int> values_vec;
|
||||||
|
vector<DatMemElem> mem_elem_vec;
|
||||||
|
|
||||||
|
keys_ptr_vec.reserve(elements.size());
|
||||||
|
values_vec.reserve(elements.size());
|
||||||
|
mem_elem_vec.reserve(elements.size());
|
||||||
|
|
||||||
|
CacheFileHeader header;
|
||||||
|
header.min_weight = min_weight_;
|
||||||
|
assert(sizeof(header.md5_hex) == md5.size());
|
||||||
|
memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < elements.size(); ++i) {
|
||||||
|
keys_ptr_vec.push_back(elements[i].word.data());
|
||||||
|
values_vec.push_back(i);
|
||||||
|
mem_elem_vec.push_back(DatMemElem());
|
||||||
|
auto & mem_elem = mem_elem_vec.back();
|
||||||
|
mem_elem.weight = elements[i].weight;
|
||||||
|
mem_elem.SetTag(elements[i].tag);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
|
||||||
|
assert(0 == ret);
|
||||||
|
header.elements_num = mem_elem_vec.size();
|
||||||
|
header.dat_size = dat_.size();
|
||||||
|
|
||||||
|
{
|
||||||
|
string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
|
||||||
|
::umask(S_IWGRP | S_IWOTH);
|
||||||
|
//const int fd =::mkstemp(&tmp_filepath[0]);
|
||||||
|
//原mkstemp用法有误,已修复--jxx20210519
|
||||||
|
const int fd =::mkstemp((char *)tmp_filepath.data());
|
||||||
|
qDebug() << "mkstemp :" << errno << tmp_filepath.data();
|
||||||
|
assert(fd >= 0);
|
||||||
|
::fchmod(fd, 0644);
|
||||||
|
|
||||||
|
auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
|
||||||
|
write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
|
||||||
|
write_bytes += ::write(fd, dat_.array(), dat_.total_size());
|
||||||
|
|
||||||
|
assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
|
||||||
|
::close(fd);
|
||||||
|
|
||||||
|
const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
|
||||||
|
assert(0 == rename_ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BuildDatCache(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
|
||||||
|
std::sort(elements.begin(), elements.end());
|
||||||
|
|
||||||
|
vector<const char*> keys_ptr_vec;
|
||||||
|
vector<int> values_vec;
|
||||||
|
vector<double> mem_elem_vec;
|
||||||
|
|
||||||
|
keys_ptr_vec.reserve(elements.size());
|
||||||
|
values_vec.reserve(elements.size());
|
||||||
|
mem_elem_vec.reserve(elements.size());
|
||||||
|
|
||||||
|
CacheFileHeader header;
|
||||||
|
header.min_weight = min_weight_;
|
||||||
|
assert(sizeof(header.md5_hex) == md5.size());
|
||||||
|
memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < elements.size(); ++i) {
|
||||||
|
keys_ptr_vec.push_back(elements[i].word.data());
|
||||||
|
values_vec.push_back(i);
|
||||||
|
mem_elem_vec.push_back(elements[i].idf);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
|
||||||
|
assert(0 == ret);
|
||||||
|
header.elements_num = mem_elem_vec.size();
|
||||||
|
header.dat_size = dat_.size();
|
||||||
|
|
||||||
|
{
|
||||||
|
string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
|
||||||
|
::umask(S_IWGRP | S_IWOTH);
|
||||||
|
//const int fd =::mkstemp(&tmp_filepath[0]);
|
||||||
|
//原mkstemp用法有误,已修复--jxx20210519
|
||||||
|
const int fd =::mkstemp((char *)tmp_filepath.data());
|
||||||
|
qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
|
||||||
|
assert(fd >= 0);
|
||||||
|
::fchmod(fd, 0644);
|
||||||
|
|
||||||
|
auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
|
||||||
|
write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(double) * mem_elem_vec.size());
|
||||||
|
write_bytes += ::write(fd, dat_.array(), dat_.total_size());
|
||||||
|
|
||||||
|
assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(double) + dat_.total_size());
|
||||||
|
::close(fd);
|
||||||
|
|
||||||
|
const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
|
||||||
|
assert(0 == rename_ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DatTrie(const DatTrie &);
|
||||||
|
DatTrie &operator=(const DatTrie &);
|
||||||
|
|
||||||
|
private:
|
||||||
|
JiebaDAT dat_;
|
||||||
|
const DatMemElem * elements_ptr_ = nullptr;
|
||||||
|
const double * idf_elements_ptr_= nullptr;
|
||||||
|
size_t elements_num_ = 0;
|
||||||
|
double min_weight_ = 0;
|
||||||
|
|
||||||
|
int mmap_fd_ = -1;
|
||||||
|
size_t mmap_length_ = 0;
|
||||||
|
char * mmap_addr_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
inline string CalcFileListMD5(const string & files_list, size_t & file_size_sum) {
|
||||||
|
limonp::MD5 md5;
|
||||||
|
|
||||||
|
const auto files = limonp::Split(files_list, "|;");
|
||||||
|
file_size_sum = 0;
|
||||||
|
|
||||||
|
for (auto const & local_path : files) {
|
||||||
|
const int fd = ::open(local_path.c_str(), O_RDONLY);
|
||||||
|
if( fd < 0){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto const len = ::lseek(fd, 0, SEEK_END);
|
||||||
|
if (len > 0) {
|
||||||
|
void * addr = ::mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
|
assert(MAP_FAILED != addr);
|
||||||
|
|
||||||
|
md5.Update((unsigned char *) addr, len);
|
||||||
|
file_size_sum += len;
|
||||||
|
|
||||||
|
::munmap(addr, len);
|
||||||
|
}
|
||||||
|
::close(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
md5.Final();
|
||||||
|
return string(md5.digestChars);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,23 +1,4 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_DICT_TRIE_HPP
|
|
||||||
#define CPPJIEBA_DICT_TRIE_HPP
|
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
@ -31,8 +12,8 @@
|
||||||
#include "limonp/StringUtil.hpp"
|
#include "limonp/StringUtil.hpp"
|
||||||
#include "limonp/Logging.hpp"
|
#include "limonp/Logging.hpp"
|
||||||
#include "Unicode.hpp"
|
#include "Unicode.hpp"
|
||||||
#include "Trie.hpp"
|
#include "DatTrie.hpp"
|
||||||
|
#include <QDebug>
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
|
|
||||||
using namespace limonp;
|
using namespace limonp;
|
||||||
|
@ -50,58 +31,29 @@ public:
|
||||||
WordWeightMax,
|
WordWeightMax,
|
||||||
}; // enum UserWordWeightOption
|
}; // enum UserWordWeightOption
|
||||||
|
|
||||||
DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
|
DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "",
|
||||||
Init(dict_path, user_dict_paths, user_word_weight_opt);
|
UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
|
||||||
|
Init(dict_path, user_dict_paths, dat_cache_path, user_word_weight_opt);
|
||||||
}
|
}
|
||||||
|
|
||||||
~DictTrie() {
|
~DictTrie() {}
|
||||||
delete trie_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
const DatMemElem* Find(const string & word) const {
|
||||||
DictUnit node_info;
|
return dat_.Find(word);
|
||||||
if(!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
active_node_infos_.push_back(node_info);
|
|
||||||
trie_->InsertNode(node_info.word, &active_node_infos_.back());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool InsertUserWord(const string& word, int freq, const string& tag = UNKNOWN_TAG) {
|
|
||||||
DictUnit node_info;
|
|
||||||
double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
|
|
||||||
if(!MakeNodeInfo(node_info, word, weight, tag)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
active_node_infos_.push_back(node_info);
|
|
||||||
trie_->InsertNode(node_info.word, &active_node_infos_.back());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
|
||||||
return trie_->Find(begin, end);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Find(RuneStrArray::const_iterator begin,
|
void Find(RuneStrArray::const_iterator begin,
|
||||||
RuneStrArray::const_iterator end,
|
RuneStrArray::const_iterator end,
|
||||||
vector<struct Dag>&res,
|
vector<struct DatDag>&res,
|
||||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
trie_->Find(begin, end, res, max_word_len);
|
dat_.Find(begin, end, res, max_word_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Find(const string& word) {
|
void Find(RuneStrArray::const_iterator begin,
|
||||||
const DictUnit *tmp = NULL;
|
RuneStrArray::const_iterator end,
|
||||||
RuneStrArray runes;
|
vector<WordRange>& words,
|
||||||
if(!DecodeRunesInString(word, runes)) {
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
XLOG(ERROR) << "Decode failed.";
|
dat_.Find(begin, end, words, max_word_len);
|
||||||
}
|
|
||||||
tmp = Find(runes.begin(), runes.end());
|
|
||||||
if(tmp == NULL) {
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsUserDictSingleChineseWord(const Rune& word) const {
|
bool IsUserDictSingleChineseWord(const Rune& word) const {
|
||||||
|
@ -109,182 +61,176 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
double GetMinWeight() const {
|
double GetMinWeight() const {
|
||||||
return min_weight_;
|
return dat_.GetMinWeight();
|
||||||
}
|
}
|
||||||
|
|
||||||
void InserUserDictNode(const string& line) {
|
size_t GetTotalDictSize() const {
|
||||||
|
return total_dict_size_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void InserUserDictNode(const string& line, bool saveNodeInfo = true) {
|
||||||
vector<string> buf;
|
vector<string> buf;
|
||||||
DictUnit node_info;
|
DatElement node_info;
|
||||||
Split(line, buf, " ");
|
Split(line, buf, " ");
|
||||||
if(buf.size() == 1) {
|
|
||||||
MakeNodeInfo(node_info,
|
if (buf.size() == 0) {
|
||||||
buf[0],
|
return;
|
||||||
user_word_default_weight_,
|
|
||||||
UNKNOWN_TAG);
|
|
||||||
} else if(buf.size() == 2) {
|
|
||||||
MakeNodeInfo(node_info,
|
|
||||||
buf[0],
|
|
||||||
user_word_default_weight_,
|
|
||||||
buf[1]);
|
|
||||||
} else if(buf.size() == 3) {
|
|
||||||
int freq = atoi(buf[1].c_str());
|
|
||||||
assert(freq_sum_ > 0.0);
|
|
||||||
double weight = log(1.0 * freq / freq_sum_);
|
|
||||||
MakeNodeInfo(node_info, buf[0], weight, buf[2]);
|
|
||||||
}
|
}
|
||||||
static_node_infos_.push_back(node_info);
|
|
||||||
if(node_info.word.size() == 1) {
|
node_info.word = buf[0];
|
||||||
user_dict_single_chinese_word_.insert(node_info.word[0]);
|
node_info.weight = user_word_default_weight_;
|
||||||
|
node_info.tag = UNKNOWN_TAG;
|
||||||
|
|
||||||
|
if (buf.size() == 2) {
|
||||||
|
node_info.tag = buf[1];
|
||||||
|
} else if (buf.size() == 3) {
|
||||||
|
if (freq_sum_ > 0.0) {
|
||||||
|
const int freq = atoi(buf[1].c_str());
|
||||||
|
node_info.weight = log(1.0 * freq / freq_sum_);
|
||||||
|
node_info.tag = buf[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (saveNodeInfo) {
|
||||||
|
static_node_infos_.push_back(node_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Utf8CharNum(node_info.word) == 1) {
|
||||||
|
RuneArray word;
|
||||||
|
|
||||||
|
if (DecodeRunesInString(node_info.word, word)) {
|
||||||
|
user_dict_single_chinese_word_.insert(word[0]);
|
||||||
|
} else {
|
||||||
|
XLOG(ERROR) << "Decode " << node_info.word << " failed.";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void LoadUserDict(const vector<string>& buf) {
|
void LoadUserDict(const string& filePaths, bool saveNodeInfo = true) {
|
||||||
for(size_t i = 0; i < buf.size(); i++) {
|
|
||||||
InserUserDictNode(buf[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void LoadUserDict(const set<string>& buf) {
|
|
||||||
std::set<string>::const_iterator iter;
|
|
||||||
for(iter = buf.begin(); iter != buf.end(); iter++) {
|
|
||||||
InserUserDictNode(*iter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void LoadUserDict(const string& filePaths) {
|
|
||||||
vector<string> files = limonp::Split(filePaths, "|;");
|
vector<string> files = limonp::Split(filePaths, "|;");
|
||||||
size_t lineno = 0;
|
|
||||||
for(size_t i = 0; i < files.size(); i++) {
|
for (size_t i = 0; i < files.size(); i++) {
|
||||||
ifstream ifs(files[i].c_str());
|
ifstream ifs(files[i].c_str());
|
||||||
XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
|
XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
|
||||||
string line;
|
string line;
|
||||||
|
|
||||||
for(; getline(ifs, line); lineno++) {
|
for (; getline(ifs, line);) {
|
||||||
if(line.size() == 0) {
|
if (line.size() == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
InserUserDictNode(line);
|
|
||||||
|
InserUserDictNode(line, saveNodeInfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) {
|
void Init(const string& dict_path, const string& user_dict_paths, string dat_cache_path,
|
||||||
LoadDict(dict_path);
|
UserWordWeightOption user_word_weight_opt) {
|
||||||
|
const auto dict_list = dict_path + "|" + user_dict_paths;
|
||||||
|
size_t file_size_sum = 0;
|
||||||
|
const string md5 = CalcFileListMD5(dict_list, file_size_sum);
|
||||||
|
|
||||||
|
if (dat_cache_path.empty()) {
|
||||||
|
//未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
|
||||||
|
dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache";
|
||||||
|
}
|
||||||
|
QString path = QString::fromStdString(dat_cache_path);
|
||||||
|
qDebug() << "#########Dict path:" << path;
|
||||||
|
if (dat_.InitAttachDat(dat_cache_path, md5)) {
|
||||||
|
LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_;
|
||||||
|
total_dict_size_ = file_size_sum;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LoadDefaultDict(dict_path);
|
||||||
freq_sum_ = CalcFreqSum(static_node_infos_);
|
freq_sum_ = CalcFreqSum(static_node_infos_);
|
||||||
CalculateWeight(static_node_infos_, freq_sum_);
|
CalculateWeight(static_node_infos_, freq_sum_);
|
||||||
SetStaticWordWeights(user_word_weight_opt);
|
double min_weight = 0;
|
||||||
|
SetStaticWordWeights(user_word_weight_opt, min_weight);
|
||||||
|
dat_.SetMinWeight(min_weight);
|
||||||
|
|
||||||
if(user_dict_paths.size()) {
|
LoadUserDict(user_dict_paths);
|
||||||
LoadUserDict(user_dict_paths);
|
const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
|
||||||
}
|
assert(build_ret);
|
||||||
Shrink(static_node_infos_);
|
total_dict_size_ = file_size_sum;
|
||||||
CreateTrie(static_node_infos_);
|
vector<DatElement>().swap(static_node_infos_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CreateTrie(const vector<DictUnit>& dictUnits) {
|
void LoadDefaultDict(const string& filePath) {
|
||||||
assert(dictUnits.size());
|
|
||||||
vector<Unicode> words;
|
|
||||||
vector<const DictUnit*> valuePointers;
|
|
||||||
for(size_t i = 0 ; i < dictUnits.size(); i ++) {
|
|
||||||
words.push_back(dictUnits[i].word);
|
|
||||||
valuePointers.push_back(&dictUnits[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
trie_ = new Trie(words, valuePointers);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool MakeNodeInfo(DictUnit& node_info,
|
|
||||||
const string& word,
|
|
||||||
double weight,
|
|
||||||
const string& tag) {
|
|
||||||
if(!DecodeRunesInString(word, node_info.word)) {
|
|
||||||
XLOG(ERROR) << "Decode " << word << " failed.";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
node_info.weight = weight;
|
|
||||||
node_info.tag = tag;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void LoadDict(const string& filePath) {
|
|
||||||
ifstream ifs(filePath.c_str());
|
ifstream ifs(filePath.c_str());
|
||||||
XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
|
XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
|
||||||
string line;
|
string line;
|
||||||
vector<string> buf;
|
vector<string> buf;
|
||||||
|
|
||||||
DictUnit node_info;
|
for (; getline(ifs, line);) {
|
||||||
for(size_t lineno = 0; getline(ifs, line); lineno++) {
|
|
||||||
Split(line, buf, " ");
|
Split(line, buf, " ");
|
||||||
XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
|
XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
|
||||||
MakeNodeInfo(node_info,
|
DatElement node_info;
|
||||||
buf[0],
|
node_info.word = buf[0];
|
||||||
atof(buf[1].c_str()),
|
node_info.weight = atof(buf[1].c_str());
|
||||||
buf[2]);
|
node_info.tag = buf[2];
|
||||||
static_node_infos_.push_back(node_info);
|
static_node_infos_.push_back(node_info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool WeightCompare(const DictUnit& lhs, const DictUnit& rhs) {
|
static bool WeightCompare(const DatElement& lhs, const DatElement& rhs) {
|
||||||
return lhs.weight < rhs.weight;
|
return lhs.weight < rhs.weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetStaticWordWeights(UserWordWeightOption option) {
|
void SetStaticWordWeights(UserWordWeightOption option, double & min_weight) {
|
||||||
XCHECK(!static_node_infos_.empty());
|
XCHECK(!static_node_infos_.empty());
|
||||||
vector<DictUnit> x = static_node_infos_;
|
vector<DatElement> x = static_node_infos_;
|
||||||
sort(x.begin(), x.end(), WeightCompare);
|
sort(x.begin(), x.end(), WeightCompare);
|
||||||
min_weight_ = x[0].weight;
|
if(x.empty()){
|
||||||
max_weight_ = x[x.size() - 1].weight;
|
return;
|
||||||
median_weight_ = x[x.size() / 2].weight;
|
}
|
||||||
switch(option) {
|
min_weight = x[0].weight;
|
||||||
case WordWeightMin:
|
const double max_weight_ = x[x.size() - 1].weight;
|
||||||
user_word_default_weight_ = min_weight_;
|
const double median_weight_ = x[x.size() / 2].weight;
|
||||||
break;
|
|
||||||
case WordWeightMedian:
|
switch (option) {
|
||||||
user_word_default_weight_ = median_weight_;
|
case WordWeightMin:
|
||||||
break;
|
user_word_default_weight_ = min_weight;
|
||||||
default:
|
break;
|
||||||
user_word_default_weight_ = max_weight_;
|
|
||||||
break;
|
case WordWeightMedian:
|
||||||
|
user_word_default_weight_ = median_weight_;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
user_word_default_weight_ = max_weight_;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double CalcFreqSum(const vector<DictUnit>& node_infos) const {
|
double CalcFreqSum(const vector<DatElement>& node_infos) const {
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
for(size_t i = 0; i < node_infos.size(); i++) {
|
|
||||||
|
for (size_t i = 0; i < node_infos.size(); i++) {
|
||||||
sum += node_infos[i].weight;
|
sum += node_infos[i].weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CalculateWeight(vector<DictUnit>& node_infos, double sum) const {
|
void CalculateWeight(vector<DatElement>& node_infos, double sum) const {
|
||||||
assert(sum > 0.0);
|
for (size_t i = 0; i < node_infos.size(); i++) {
|
||||||
for(size_t i = 0; i < node_infos.size(); i++) {
|
DatElement& node_info = node_infos[i];
|
||||||
DictUnit& node_info = node_infos[i];
|
|
||||||
assert(node_info.weight > 0.0);
|
assert(node_info.weight > 0.0);
|
||||||
node_info.weight = log(double(node_info.weight) / sum);
|
node_info.weight = log(double(node_info.weight) / sum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Shrink(vector<DictUnit>& units) const {
|
private:
|
||||||
vector<DictUnit>(units.begin(), units.end()).swap(units);
|
vector<DatElement> static_node_infos_;
|
||||||
}
|
size_t total_dict_size_ = 0;
|
||||||
|
DatTrie dat_;
|
||||||
vector<DictUnit> static_node_infos_;
|
|
||||||
deque<DictUnit> active_node_infos_; // must not be vector
|
|
||||||
Trie * trie_;
|
|
||||||
|
|
||||||
double freq_sum_;
|
double freq_sum_;
|
||||||
double min_weight_;
|
|
||||||
double max_weight_;
|
|
||||||
double median_weight_;
|
|
||||||
double user_word_default_weight_;
|
double user_word_default_weight_;
|
||||||
unordered_set<Rune> user_dict_single_chinese_word_;
|
unordered_set<Rune> user_dict_single_chinese_word_;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,23 +1,4 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_FULLSEGMENT_H
|
|
||||||
#define CPPJIEBA_FULLSEGMENT_H
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
@ -30,82 +11,48 @@
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
class FullSegment: public SegmentBase {
|
class FullSegment: public SegmentBase {
|
||||||
public:
|
public:
|
||||||
FullSegment(const string& dictPath) {
|
|
||||||
dictTrie_ = new DictTrie(dictPath);
|
|
||||||
isNeedDestroy_ = true;
|
|
||||||
}
|
|
||||||
FullSegment(const DictTrie* dictTrie)
|
FullSegment(const DictTrie* dictTrie)
|
||||||
: dictTrie_(dictTrie), isNeedDestroy_(false) {
|
: dictTrie_(dictTrie) {
|
||||||
assert(dictTrie_);
|
assert(dictTrie_);
|
||||||
}
|
}
|
||||||
~FullSegment() {
|
~FullSegment() { }
|
||||||
if(isNeedDestroy_) {
|
|
||||||
delete dictTrie_;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void Cut(const string& sentence,
|
|
||||||
vector<string>& words) const {
|
|
||||||
vector<Word> tmp;
|
|
||||||
Cut(sentence, tmp);
|
|
||||||
GetStringsFromWords(tmp, words);
|
|
||||||
}
|
|
||||||
void Cut(const string& sentence,
|
|
||||||
vector<Word>& words) const {
|
|
||||||
PreFilter pre_filter(symbols_, sentence);
|
|
||||||
PreFilter::Range range;
|
|
||||||
vector<WordRange> wrs;
|
|
||||||
wrs.reserve(sentence.size() / 2);
|
|
||||||
while(pre_filter.HasNext()) {
|
|
||||||
range = pre_filter.Next();
|
|
||||||
Cut(range.begin, range.end, wrs);
|
|
||||||
}
|
|
||||||
words.clear();
|
|
||||||
words.reserve(wrs.size());
|
|
||||||
GetWordsFromWordRanges(sentence, wrs, words);
|
|
||||||
}
|
|
||||||
void Cut(RuneStrArray::const_iterator begin,
|
|
||||||
RuneStrArray::const_iterator end,
|
|
||||||
vector<WordRange>& res) const {
|
|
||||||
// result of searching in trie tree
|
|
||||||
LocalVector<pair<size_t, const DictUnit*> > tRes;
|
|
||||||
|
|
||||||
// max index of res's words
|
virtual void Cut(RuneStrArray::const_iterator begin,
|
||||||
size_t maxIdx = 0;
|
RuneStrArray::const_iterator end,
|
||||||
|
vector<WordRange>& res, bool, size_t) const override {
|
||||||
// always equals to (uItr - begin)
|
|
||||||
size_t uIdx = 0;
|
|
||||||
|
|
||||||
// tmp variables
|
|
||||||
size_t wordLen = 0;
|
|
||||||
assert(dictTrie_);
|
assert(dictTrie_);
|
||||||
vector<struct Dag> dags;
|
vector<struct DatDag> dags;
|
||||||
dictTrie_->Find(begin, end, dags);
|
dictTrie_->Find(begin, end, dags);
|
||||||
for(size_t i = 0; i < dags.size(); i++) {
|
size_t max_word_end_pos = 0;
|
||||||
for(size_t j = 0; j < dags[i].nexts.size(); j++) {
|
|
||||||
size_t nextoffset = dags[i].nexts[j].first;
|
for (size_t i = 0; i < dags.size(); i++) {
|
||||||
|
for (const auto & kv : dags[i].nexts) {
|
||||||
|
const size_t nextoffset = kv.first - 1;
|
||||||
assert(nextoffset < dags.size());
|
assert(nextoffset < dags.size());
|
||||||
const DictUnit* du = dags[i].nexts[j].second;
|
const auto wordLen = nextoffset - i + 1;
|
||||||
if(du == NULL) {
|
const bool is_not_covered_single_word = ((dags[i].nexts.size() == 1) && (max_word_end_pos <= i));
|
||||||
if(dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
|
const bool is_oov = (nullptr == kv.second); //Out-of-Vocabulary
|
||||||
WordRange wr(begin + i, begin + nextoffset);
|
|
||||||
res.push_back(wr);
|
if ((is_not_covered_single_word) || ((not is_oov) && (wordLen >= 2))) {
|
||||||
}
|
WordRange wr(begin + i, begin + nextoffset);
|
||||||
} else {
|
res.push_back(wr);
|
||||||
wordLen = du->word.size();
|
|
||||||
if(wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
|
|
||||||
WordRange wr(begin + i, begin + nextoffset);
|
|
||||||
res.push_back(wr);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx;
|
|
||||||
|
max_word_end_pos = max(max_word_end_pos, nextoffset + 1);
|
||||||
}
|
}
|
||||||
uIdx++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||||
|
size_t) const override {
|
||||||
|
|
||||||
|
}
|
||||||
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
|
||||||
|
size_t) const override {
|
||||||
|
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
const DictTrie* dictTrie_;
|
const DictTrie* dictTrie_;
|
||||||
bool isNeedDestroy_;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,26 +1,6 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_HMMMODEL_H
|
|
||||||
#define CPPJIEBA_HMMMODEL_H
|
|
||||||
|
|
||||||
#include "limonp/StringUtil.hpp"
|
#include "limonp/StringUtil.hpp"
|
||||||
#include "Trie.hpp"
|
|
||||||
|
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
|
|
||||||
|
@ -59,16 +39,18 @@ struct HMMModel {
|
||||||
XCHECK(GetLine(ifile, line));
|
XCHECK(GetLine(ifile, line));
|
||||||
Split(line, tmp, " ");
|
Split(line, tmp, " ");
|
||||||
XCHECK(tmp.size() == STATUS_SUM);
|
XCHECK(tmp.size() == STATUS_SUM);
|
||||||
for(size_t j = 0; j < tmp.size(); j++) {
|
|
||||||
|
for (size_t j = 0; j < tmp.size(); j++) {
|
||||||
startProb[j] = atof(tmp[j].c_str());
|
startProb[j] = atof(tmp[j].c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
//Load transProb
|
//Load transProb
|
||||||
for(size_t i = 0; i < STATUS_SUM; i++) {
|
for (size_t i = 0; i < STATUS_SUM; i++) {
|
||||||
XCHECK(GetLine(ifile, line));
|
XCHECK(GetLine(ifile, line));
|
||||||
Split(line, tmp, " ");
|
Split(line, tmp, " ");
|
||||||
XCHECK(tmp.size() == STATUS_SUM);
|
XCHECK(tmp.size() == STATUS_SUM);
|
||||||
for(size_t j = 0; j < STATUS_SUM; j++) {
|
|
||||||
|
for (size_t j = 0; j < tmp.size(); j++) {
|
||||||
transProb[i][j] = atof(tmp[j].c_str());
|
transProb[i][j] = atof(tmp[j].c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -92,43 +74,55 @@ struct HMMModel {
|
||||||
double GetEmitProb(const EmitProbMap* ptMp, Rune key,
|
double GetEmitProb(const EmitProbMap* ptMp, Rune key,
|
||||||
double defVal)const {
|
double defVal)const {
|
||||||
EmitProbMap::const_iterator cit = ptMp->find(key);
|
EmitProbMap::const_iterator cit = ptMp->find(key);
|
||||||
if(cit == ptMp->end()) {
|
|
||||||
|
if (cit == ptMp->end()) {
|
||||||
return defVal;
|
return defVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
return cit->second;
|
return cit->second;
|
||||||
}
|
}
|
||||||
bool GetLine(ifstream& ifile, string& line) {
|
bool GetLine(ifstream& ifile, string& line) {
|
||||||
while(getline(ifile, line)) {
|
while (getline(ifile, line)) {
|
||||||
Trim(line);
|
Trim(line);
|
||||||
if(line.empty()) {
|
|
||||||
|
if (line.empty()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if(StartsWith(line, "#")) {
|
|
||||||
|
if (StartsWith(line, "#")) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
bool LoadEmitProb(const string& line, EmitProbMap& mp) {
|
bool LoadEmitProb(const string& line, EmitProbMap& mp) {
|
||||||
if(line.empty()) {
|
if (line.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<string> tmp, tmp2;
|
vector<string> tmp, tmp2;
|
||||||
Unicode unicode;
|
RuneArray unicode;
|
||||||
Split(line, tmp, ",");
|
Split(line, tmp, ",");
|
||||||
for(size_t i = 0; i < tmp.size(); i++) {
|
|
||||||
|
for (size_t i = 0; i < tmp.size(); i++) {
|
||||||
Split(tmp[i], tmp2, ":");
|
Split(tmp[i], tmp2, ":");
|
||||||
if(2 != tmp2.size()) {
|
|
||||||
|
if (2 != tmp2.size()) {
|
||||||
XLOG(ERROR) << "emitProb illegal.";
|
XLOG(ERROR) << "emitProb illegal.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if(!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
|
|
||||||
|
if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
|
||||||
XLOG(ERROR) << "TransCode failed.";
|
XLOG(ERROR) << "TransCode failed.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
mp[unicode[0]] = atof(tmp2[1].c_str());
|
mp[unicode[0]] = atof(tmp2[1].c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,4 +138,3 @@ struct HMMModel {
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,23 +1,4 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIBEA_HMMSEGMENT_H
|
|
||||||
#define CPPJIBEA_HMMSEGMENT_H
|
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
@ -29,58 +10,40 @@
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
class HMMSegment: public SegmentBase {
|
class HMMSegment: public SegmentBase {
|
||||||
public:
|
public:
|
||||||
HMMSegment(const string& filePath)
|
|
||||||
: model_(new HMMModel(filePath)), isNeedDestroy_(true) {
|
|
||||||
}
|
|
||||||
HMMSegment(const HMMModel* model)
|
HMMSegment(const HMMModel* model)
|
||||||
: model_(model), isNeedDestroy_(false) {
|
: model_(model) {
|
||||||
}
|
|
||||||
~HMMSegment() {
|
|
||||||
if(isNeedDestroy_) {
|
|
||||||
delete model_;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
~HMMSegment() { }
|
||||||
|
|
||||||
void Cut(const string& sentence,
|
virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool,
|
||||||
vector<string>& words) const {
|
size_t) const override {
|
||||||
vector<Word> tmp;
|
|
||||||
Cut(sentence, tmp);
|
|
||||||
GetStringsFromWords(tmp, words);
|
|
||||||
}
|
|
||||||
void Cut(const string& sentence,
|
|
||||||
vector<Word>& words) const {
|
|
||||||
PreFilter pre_filter(symbols_, sentence);
|
|
||||||
PreFilter::Range range;
|
|
||||||
vector<WordRange> wrs;
|
|
||||||
wrs.reserve(sentence.size() / 2);
|
|
||||||
while(pre_filter.HasNext()) {
|
|
||||||
range = pre_filter.Next();
|
|
||||||
Cut(range.begin, range.end, wrs);
|
|
||||||
}
|
|
||||||
words.clear();
|
|
||||||
words.reserve(wrs.size());
|
|
||||||
GetWordsFromWordRanges(sentence, wrs, words);
|
|
||||||
}
|
|
||||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
|
|
||||||
RuneStrArray::const_iterator left = begin;
|
RuneStrArray::const_iterator left = begin;
|
||||||
RuneStrArray::const_iterator right = begin;
|
RuneStrArray::const_iterator right = begin;
|
||||||
while(right != end) {
|
|
||||||
if(right->rune < 0x80) {
|
while (right != end) {
|
||||||
if(left != right) {
|
if (right->rune < 0x80) { //asc码
|
||||||
|
if (left != right) {
|
||||||
InternalCut(left, right, res);
|
InternalCut(left, right, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
left = right;
|
left = right;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
right = SequentialLetterRule(left, end);
|
right = SequentialLetterRule(left, end);//非英文字符则返回left,否则返回left后非英文字母的位置
|
||||||
if(right != left) {
|
|
||||||
|
if (right != left) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
right = NumbersRule(left, end);
|
|
||||||
if(right != left) {
|
right = NumbersRule(left, end);//非数字则返回left,否则返回left后非数字的位置
|
||||||
|
|
||||||
|
if (right != left) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
right ++;
|
right ++;
|
||||||
} while(false);
|
} while (false);
|
||||||
|
|
||||||
WordRange wr(left, right - 1);
|
WordRange wr(left, right - 1);
|
||||||
res.push_back(wr);
|
res.push_back(wr);
|
||||||
left = right;
|
left = right;
|
||||||
|
@ -88,45 +51,64 @@ public:
|
||||||
right++;
|
right++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(left != right) {
|
|
||||||
|
if (left != right) {
|
||||||
InternalCut(left, right, res);
|
InternalCut(left, right, res);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||||
|
size_t) const override {
|
||||||
|
|
||||||
|
}
|
||||||
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
|
||||||
|
size_t) const override {
|
||||||
|
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
// sequential letters rule
|
// sequential letters rule
|
||||||
RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin,
|
||||||
|
RuneStrArray::const_iterator end) const {
|
||||||
Rune x = begin->rune;
|
Rune x = begin->rune;
|
||||||
if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
|
|
||||||
|
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
|
||||||
begin ++;
|
begin ++;
|
||||||
} else {
|
} else {
|
||||||
return begin;
|
return begin;
|
||||||
}
|
}
|
||||||
while(begin != end) {
|
|
||||||
|
while (begin != end) {
|
||||||
x = begin->rune;
|
x = begin->rune;
|
||||||
if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
|
|
||||||
|
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
|
||||||
begin ++;
|
begin ++;
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return begin;
|
return begin;
|
||||||
}
|
}
|
||||||
//
|
//
|
||||||
RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
||||||
Rune x = begin->rune;
|
Rune x = begin->rune;
|
||||||
if('0' <= x && x <= '9') {
|
|
||||||
|
if ('0' <= x && x <= '9') {
|
||||||
begin ++;
|
begin ++;
|
||||||
} else {
|
} else {
|
||||||
return begin;
|
return begin;
|
||||||
}
|
}
|
||||||
while(begin != end) {
|
|
||||||
|
while (begin != end) {
|
||||||
x = begin->rune;
|
x = begin->rune;
|
||||||
if(('0' <= x && x <= '9') || x == '.') {
|
|
||||||
|
if (('0' <= x && x <= '9') || x == '.') {
|
||||||
begin++;
|
begin++;
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return begin;
|
return begin;
|
||||||
}
|
}
|
||||||
void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
|
void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
|
||||||
|
@ -135,8 +117,9 @@ private:
|
||||||
|
|
||||||
RuneStrArray::const_iterator left = begin;
|
RuneStrArray::const_iterator left = begin;
|
||||||
RuneStrArray::const_iterator right;
|
RuneStrArray::const_iterator right;
|
||||||
for(size_t i = 0; i < status.size(); i++) {
|
|
||||||
if(status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
|
for (size_t i = 0; i < status.size(); i++) {
|
||||||
|
if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
|
||||||
right = begin + i + 1;
|
right = begin + i + 1;
|
||||||
WordRange wr(left, right - 1);
|
WordRange wr(left, right - 1);
|
||||||
res.push_back(wr);
|
res.push_back(wr);
|
||||||
|
@ -155,27 +138,31 @@ private:
|
||||||
size_t now, old, stat;
|
size_t now, old, stat;
|
||||||
double tmp, endE, endS;
|
double tmp, endE, endS;
|
||||||
|
|
||||||
vector<int> path(XYSize);
|
//vector<int> path(XYSize);
|
||||||
vector<double> weight(XYSize);
|
//vector<double> weight(XYSize);
|
||||||
|
int path[XYSize];
|
||||||
|
double weight[XYSize];
|
||||||
|
|
||||||
//start
|
//start
|
||||||
for(size_t y = 0; y < Y; y++) {
|
for (size_t y = 0; y < Y; y++) {
|
||||||
weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
|
weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
|
||||||
path[0 + y * X] = -1;
|
path[0 + y * X] = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
double emitProb;
|
double emitProb;
|
||||||
|
|
||||||
for(size_t x = 1; x < X; x++) {
|
for (size_t x = 1; x < X; x++) {
|
||||||
for(size_t y = 0; y < Y; y++) {
|
for (size_t y = 0; y < Y; y++) {
|
||||||
now = x + y * X;
|
now = x + y * X;
|
||||||
weight[now] = MIN_DOUBLE;
|
weight[now] = MIN_DOUBLE;
|
||||||
path[now] = HMMModel::E; // warning
|
path[now] = HMMModel::E; // warning
|
||||||
emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE);
|
emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE);
|
||||||
for(size_t preY = 0; preY < Y; preY++) {
|
|
||||||
|
for (size_t preY = 0; preY < Y; preY++) {
|
||||||
old = x - 1 + preY * X;
|
old = x - 1 + preY * X;
|
||||||
tmp = weight[old] + model_->transProb[preY][y] + emitProb;
|
tmp = weight[old] + model_->transProb[preY][y] + emitProb;
|
||||||
if(tmp > weight[now]) {
|
|
||||||
|
if (tmp > weight[now]) {
|
||||||
weight[now] = tmp;
|
weight[now] = tmp;
|
||||||
path[now] = preY;
|
path[now] = preY;
|
||||||
}
|
}
|
||||||
|
@ -186,23 +173,23 @@ private:
|
||||||
endE = weight[X - 1 + HMMModel::E * X];
|
endE = weight[X - 1 + HMMModel::E * X];
|
||||||
endS = weight[X - 1 + HMMModel::S * X];
|
endS = weight[X - 1 + HMMModel::S * X];
|
||||||
stat = 0;
|
stat = 0;
|
||||||
if(endE >= endS) {
|
|
||||||
|
if (endE >= endS) {
|
||||||
stat = HMMModel::E;
|
stat = HMMModel::E;
|
||||||
} else {
|
} else {
|
||||||
stat = HMMModel::S;
|
stat = HMMModel::S;
|
||||||
}
|
}
|
||||||
|
|
||||||
status.resize(X);
|
status.resize(X);
|
||||||
for(int x = X - 1 ; x >= 0; x--) {
|
|
||||||
|
for (int x = X - 1 ; x >= 0; x--) {
|
||||||
status[x] = stat;
|
status[x] = stat;
|
||||||
stat = path[x + stat * X];
|
stat = path[x + stat * X];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const HMMModel* model_;
|
const HMMModel* model_;
|
||||||
bool isNeedDestroy_;
|
|
||||||
}; // class HMMSegment
|
}; // class HMMSegment
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -0,0 +1,134 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <cmath>
|
||||||
|
#include <limits>
|
||||||
|
#include "limonp/StringUtil.hpp"
|
||||||
|
#include "limonp/Logging.hpp"
|
||||||
|
#include "Unicode.hpp"
|
||||||
|
#include "DatTrie.hpp"
|
||||||
|
#include <QDebug>
|
||||||
|
namespace cppjieba {
|
||||||
|
|
||||||
|
using namespace limonp;
|
||||||
|
|
||||||
|
const size_t IDF_COLUMN_NUM = 2;
|
||||||
|
|
||||||
|
class IdfTrie {
|
||||||
|
public:
|
||||||
|
enum UserWordWeightOption {
|
||||||
|
WordWeightMin,
|
||||||
|
WordWeightMedian,
|
||||||
|
WordWeightMax,
|
||||||
|
}; // enum UserWordWeightOption
|
||||||
|
|
||||||
|
IdfTrie(const string& dict_path, const string & dat_cache_path = "",
|
||||||
|
UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
|
||||||
|
Init(dict_path, dat_cache_path, user_word_weight_opt);
|
||||||
|
}
|
||||||
|
|
||||||
|
~IdfTrie() {}
|
||||||
|
|
||||||
|
double Find(const string & word, std::size_t length = 0, std::size_t node_pos = 0) const {
|
||||||
|
return dat_.Find(word, length, node_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Find(RuneStrArray::const_iterator begin,
|
||||||
|
RuneStrArray::const_iterator end,
|
||||||
|
vector<struct DatDag>&res,
|
||||||
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
|
dat_.Find(begin, end, res, max_word_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsUserDictSingleChineseWord(const Rune& word) const {
|
||||||
|
return IsIn(user_dict_single_chinese_word_, word);
|
||||||
|
}
|
||||||
|
|
||||||
|
double GetMinWeight() const {
|
||||||
|
return dat_.GetMinWeight();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetTotalDictSize() const {
|
||||||
|
return total_dict_size_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Init(const string& dict_path, string dat_cache_path,
|
||||||
|
UserWordWeightOption user_word_weight_opt) {
|
||||||
|
size_t file_size_sum = 0;
|
||||||
|
const string md5 = CalcFileListMD5(dict_path, file_size_sum);
|
||||||
|
|
||||||
|
if (dat_cache_path.empty()) {
|
||||||
|
//未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
|
||||||
|
dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache";
|
||||||
|
}
|
||||||
|
QString path = QString::fromStdString(dat_cache_path);
|
||||||
|
qDebug() << "#########Idf path:" << path;
|
||||||
|
if (dat_.InitIdfAttachDat(dat_cache_path, md5)) {
|
||||||
|
total_dict_size_ = file_size_sum;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LoadDefaultIdf(dict_path);
|
||||||
|
double idf_sum_ = CalcIdfSum(static_node_infos_);
|
||||||
|
assert(static_node_infos_.size());
|
||||||
|
idfAverage_ = idf_sum_ / static_node_infos_.size();
|
||||||
|
assert(idfAverage_ > 0.0);
|
||||||
|
double min_weight = 0;
|
||||||
|
dat_.SetMinWeight(min_weight);
|
||||||
|
|
||||||
|
const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
|
||||||
|
assert(build_ret);
|
||||||
|
total_dict_size_ = file_size_sum;
|
||||||
|
vector<IdfElement>().swap(static_node_infos_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void LoadDefaultIdf(const string& filePath) {
|
||||||
|
ifstream ifs(filePath.c_str());
|
||||||
|
if(not ifs.is_open()){
|
||||||
|
return ;
|
||||||
|
}
|
||||||
|
XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
|
||||||
|
string line;
|
||||||
|
vector<string> buf;
|
||||||
|
size_t lineno = 0;
|
||||||
|
|
||||||
|
for (; getline(ifs, line); lineno++) {
|
||||||
|
if (line.empty()) {
|
||||||
|
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Split(line, buf, " ");
|
||||||
|
XCHECK(buf.size() == IDF_COLUMN_NUM) << "split result illegal, line:" << line;
|
||||||
|
IdfElement node_info;
|
||||||
|
node_info.word = buf[0];
|
||||||
|
node_info.idf = atof(buf[1].c_str());
|
||||||
|
static_node_infos_.push_back(node_info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double CalcIdfSum(const vector<IdfElement>& node_infos) const {
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < node_infos.size(); i++) {
|
||||||
|
sum += node_infos[i].idf;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
double idfAverage_;
|
||||||
|
private:
|
||||||
|
vector<IdfElement> static_node_infos_;
|
||||||
|
size_t total_dict_size_ = 0;
|
||||||
|
DatTrie dat_;
|
||||||
|
unordered_set<Rune> user_dict_single_chinese_word_;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
|
@ -1,24 +1,6 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEAB_JIEBA_H
|
|
||||||
#define CPPJIEAB_JIEBA_H
|
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include "QuerySegment.hpp"
|
#include "QuerySegment.hpp"
|
||||||
#include "KeywordExtractor.hpp"
|
#include "KeywordExtractor.hpp"
|
||||||
|
|
||||||
|
@ -29,56 +11,48 @@ public:
|
||||||
Jieba(const string& dict_path,
|
Jieba(const string& dict_path,
|
||||||
const string& model_path,
|
const string& model_path,
|
||||||
const string& user_dict_path,
|
const string& user_dict_path,
|
||||||
const string& idfPath,
|
const string& idfPath = "",
|
||||||
const string& stopWordPath)
|
const string& stopWordPath = "",
|
||||||
: dict_trie_(dict_path, user_dict_path),
|
const string& dat_cache_path = "")
|
||||||
|
: dict_trie_(dict_path, user_dict_path, dat_cache_path),
|
||||||
model_(model_path),
|
model_(model_path),
|
||||||
mp_seg_(&dict_trie_),
|
mp_seg_(&dict_trie_),
|
||||||
hmm_seg_(&model_),
|
hmm_seg_(&model_),
|
||||||
mix_seg_(&dict_trie_, &model_),
|
mix_seg_(&dict_trie_, &model_, stopWordPath),
|
||||||
full_seg_(&dict_trie_),
|
full_seg_(&dict_trie_),
|
||||||
query_seg_(&dict_trie_, &model_),
|
query_seg_(&dict_trie_, &model_, stopWordPath),
|
||||||
extractor(&dict_trie_, &model_, idfPath, stopWordPath) {
|
extractor(&dict_trie_, &model_, idfPath, dat_cache_path,stopWordPath){ }
|
||||||
|
~Jieba() { }
|
||||||
}
|
|
||||||
~Jieba() {
|
|
||||||
}
|
|
||||||
|
|
||||||
struct LocWord {
|
|
||||||
string word;
|
|
||||||
size_t begin;
|
|
||||||
size_t end;
|
|
||||||
}; // struct LocWord
|
|
||||||
|
|
||||||
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||||
mix_seg_.Cut(sentence, words, hmm);
|
mix_seg_.CutToStr(sentence, words, hmm);
|
||||||
}
|
}
|
||||||
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
||||||
mix_seg_.Cut(sentence, words, hmm);
|
mix_seg_.CutToWord(sentence, words, hmm);
|
||||||
}
|
}
|
||||||
void CutAll(const string& sentence, vector<string>& words) const {
|
void CutAll(const string& sentence, vector<string>& words) const {
|
||||||
full_seg_.Cut(sentence, words);
|
full_seg_.CutToStr(sentence, words);
|
||||||
}
|
}
|
||||||
void CutAll(const string& sentence, vector<Word>& words) const {
|
void CutAll(const string& sentence, vector<Word>& words) const {
|
||||||
full_seg_.Cut(sentence, words);
|
full_seg_.CutToWord(sentence, words);
|
||||||
}
|
}
|
||||||
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
|
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||||
query_seg_.Cut(sentence, words, hmm);
|
query_seg_.CutToStr(sentence, words, hmm);
|
||||||
}
|
}
|
||||||
void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
||||||
query_seg_.Cut(sentence, words, hmm);
|
query_seg_.CutToWord(sentence, words, hmm);
|
||||||
}
|
}
|
||||||
void CutHMM(const string& sentence, vector<string>& words) const {
|
void CutHMM(const string& sentence, vector<string>& words) const {
|
||||||
hmm_seg_.Cut(sentence, words);
|
hmm_seg_.CutToStr(sentence, words);
|
||||||
}
|
}
|
||||||
void CutHMM(const string& sentence, vector<Word>& words) const {
|
void CutHMM(const string& sentence, vector<Word>& words) const {
|
||||||
hmm_seg_.Cut(sentence, words);
|
hmm_seg_.CutToWord(sentence, words);
|
||||||
}
|
}
|
||||||
void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
|
void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
|
||||||
mp_seg_.Cut(sentence, words, max_word_len);
|
mp_seg_.CutToStr(sentence, words, false, max_word_len);
|
||||||
}
|
}
|
||||||
void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
|
void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
|
||||||
mp_seg_.Cut(sentence, words, max_word_len);
|
mp_seg_.CutToWord(sentence, words, false, max_word_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Tag(const string& sentence, vector<pair<string, string> >& words) const {
|
void Tag(const string& sentence, vector<pair<string, string> >& words) const {
|
||||||
|
@ -87,16 +61,8 @@ public:
|
||||||
string LookupTag(const string &str) const {
|
string LookupTag(const string &str) const {
|
||||||
return mix_seg_.LookupTag(str);
|
return mix_seg_.LookupTag(str);
|
||||||
}
|
}
|
||||||
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
|
||||||
return dict_trie_.InsertUserWord(word, tag);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool InsertUserWord(const string& word, int freq, const string& tag = UNKNOWN_TAG) {
|
|
||||||
return dict_trie_.InsertUserWord(word, freq, tag);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Find(const string& word) {
|
bool Find(const string& word) {
|
||||||
return dict_trie_.Find(word);
|
return nullptr != dict_trie_.Find(word);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ResetSeparators(const string& s) {
|
void ResetSeparators(const string& s) {
|
||||||
|
@ -116,18 +82,6 @@ public:
|
||||||
return &model_;
|
return &model_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void LoadUserDict(const vector<string>& buf) {
|
|
||||||
dict_trie_.LoadUserDict(buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
void LoadUserDict(const set<string>& buf) {
|
|
||||||
dict_trie_.LoadUserDict(buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
void LoadUserDict(const string& path) {
|
|
||||||
dict_trie_.LoadUserDict(path);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DictTrie dict_trie_;
|
DictTrie dict_trie_;
|
||||||
HMMModel model_;
|
HMMModel model_;
|
||||||
|
@ -145,4 +99,3 @@ public:
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif // CPPJIEAB_JIEBA_H
|
|
||||||
|
|
|
@ -1,27 +1,8 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
|
|
||||||
#define CPPJIEBA_KEYWORD_EXTRACTOR_H
|
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <set>
|
|
||||||
#include "MixSegment.hpp"
|
#include "MixSegment.hpp"
|
||||||
|
#include "IdfTrie.hpp"
|
||||||
|
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
|
|
||||||
|
@ -31,141 +12,87 @@ using namespace std;
|
||||||
/*utf8*/
|
/*utf8*/
|
||||||
class KeywordExtractor {
|
class KeywordExtractor {
|
||||||
public:
|
public:
|
||||||
struct Word {
|
|
||||||
string word;
|
|
||||||
vector<size_t> offsets;
|
|
||||||
double weight;
|
|
||||||
}; // struct Word
|
|
||||||
|
|
||||||
KeywordExtractor(const string& dictPath,
|
|
||||||
const string& hmmFilePath,
|
|
||||||
const string& idfPath,
|
|
||||||
const string& stopWordPath,
|
|
||||||
const string& userDict = "")
|
|
||||||
: segment_(dictPath, hmmFilePath, userDict) {
|
|
||||||
LoadIdfDict(idfPath);
|
|
||||||
LoadStopWordDict(stopWordPath);
|
|
||||||
}
|
|
||||||
KeywordExtractor(const DictTrie* dictTrie,
|
KeywordExtractor(const DictTrie* dictTrie,
|
||||||
const HMMModel* model,
|
const HMMModel* model,
|
||||||
const string& idfPath,
|
const string& idfPath,
|
||||||
|
const string& dat_cache_path,
|
||||||
const string& stopWordPath)
|
const string& stopWordPath)
|
||||||
: segment_(dictTrie, model) {
|
: segment_(dictTrie, model, stopWordPath),
|
||||||
LoadIdfDict(idfPath);
|
idf_trie_(idfPath,dat_cache_path){
|
||||||
LoadStopWordDict(stopWordPath);
|
|
||||||
}
|
}
|
||||||
~KeywordExtractor() {
|
~KeywordExtractor() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
||||||
vector<Word> topWords;
|
vector<KeyWord> topWords;
|
||||||
Extract(sentence, topWords, topN);
|
Extract(sentence, topWords, topN);
|
||||||
for(size_t i = 0; i < topWords.size(); i++) {
|
|
||||||
|
for (size_t i = 0; i < topWords.size(); i++) {
|
||||||
keywords.push_back(topWords[i].word);
|
keywords.push_back(topWords[i].word);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
|
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
|
||||||
vector<Word> topWords;
|
vector<KeyWord> topWords;
|
||||||
Extract(sentence, topWords, topN);
|
Extract(sentence, topWords, topN);
|
||||||
for(size_t i = 0; i < topWords.size(); i++) {
|
|
||||||
|
for (size_t i = 0; i < topWords.size(); i++) {
|
||||||
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
|
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Extract(const string& sentence, vector<Word>& keywords, size_t topN) const {
|
void Extract(const string& sentence, vector<KeyWord>& keywords, size_t topN) const {
|
||||||
vector<string> words;
|
|
||||||
segment_.Cut(sentence, words);
|
|
||||||
|
|
||||||
map<string, Word> wordmap;
|
unordered_map<string, KeyWord> wordmap;//插入字符串与Word的map,相同string统计词频叠加权重
|
||||||
size_t offset = 0;
|
PreFilter pre_filter(symbols_, sentence);
|
||||||
for(size_t i = 0; i < words.size(); ++i) {
|
RuneStrArray::const_iterator null_p;
|
||||||
size_t t = offset;
|
WordRange range(null_p, null_p);
|
||||||
offset += words[i].size();
|
bool isNull(false);
|
||||||
if(IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
|
while (pre_filter.Next(range, isNull)) {
|
||||||
|
if (isNull) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
wordmap[words[i]].offsets.push_back(t);
|
segment_.CutToStr(sentence, range, wordmap);
|
||||||
wordmap[words[i]].weight += 1.0;
|
|
||||||
}
|
|
||||||
if(offset != sentence.size()) {
|
|
||||||
XLOG(ERROR) << "words illegal";
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
keywords.clear();
|
keywords.clear();
|
||||||
keywords.reserve(wordmap.size());
|
keywords.reserve(wordmap.size());
|
||||||
for(map<string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
|
|
||||||
unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);
|
for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
|
||||||
if(cit != idfMap_.end()) {
|
double idf = idf_trie_.Find(itr->first);
|
||||||
itr->second.weight *= cit->second;
|
if (-1 != idf) {//IDF词典查找
|
||||||
|
itr->second.weight *= idf;
|
||||||
} else {
|
} else {
|
||||||
itr->second.weight *= idfAverage_;
|
itr->second.weight *= idf_trie_.idfAverage_;
|
||||||
}
|
}
|
||||||
|
|
||||||
itr->second.word = itr->first;
|
itr->second.word = itr->first;
|
||||||
keywords.push_back(itr->second);
|
keywords.push_back(itr->second);
|
||||||
}
|
}
|
||||||
|
|
||||||
topN = min(topN, keywords.size());
|
topN = min(topN, keywords.size());
|
||||||
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
|
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
|
||||||
keywords.resize(topN);
|
keywords.resize(topN);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
void LoadIdfDict(const string& idfPath) {
|
|
||||||
ifstream ifs(idfPath.c_str());
|
|
||||||
XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
|
|
||||||
string line ;
|
|
||||||
vector<string> buf;
|
|
||||||
double idf = 0.0;
|
|
||||||
double idfSum = 0.0;
|
|
||||||
size_t lineno = 0;
|
|
||||||
for(; getline(ifs, line); lineno++) {
|
|
||||||
buf.clear();
|
|
||||||
if(line.empty()) {
|
|
||||||
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Split(line, buf, " ");
|
|
||||||
if(buf.size() != 2) {
|
|
||||||
XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
idf = atof(buf[1].c_str());
|
|
||||||
idfMap_[buf[0]] = idf;
|
|
||||||
idfSum += idf;
|
|
||||||
|
|
||||||
}
|
static bool Compare(const KeyWord& lhs, const KeyWord& rhs) {
|
||||||
|
|
||||||
assert(lineno);
|
|
||||||
idfAverage_ = idfSum / lineno;
|
|
||||||
assert(idfAverage_ > 0.0);
|
|
||||||
}
|
|
||||||
void LoadStopWordDict(const string& filePath) {
|
|
||||||
ifstream ifs(filePath.c_str());
|
|
||||||
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
|
|
||||||
string line ;
|
|
||||||
while(getline(ifs, line)) {
|
|
||||||
stopWords_.insert(line);
|
|
||||||
}
|
|
||||||
assert(stopWords_.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool Compare(const Word& lhs, const Word& rhs) {
|
|
||||||
return lhs.weight > rhs.weight;
|
return lhs.weight > rhs.weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
MixSegment segment_;
|
MixSegment segment_;
|
||||||
unordered_map<string, double> idfMap_;
|
IdfTrie idf_trie_;
|
||||||
double idfAverage_;
|
|
||||||
|
|
||||||
unordered_set<string> stopWords_;
|
unordered_set<Rune> symbols_;
|
||||||
}; // class KeywordExtractor
|
}; // class KeywordExtractor
|
||||||
|
|
||||||
inline ostream& operator << (ostream& os, const KeywordExtractor::Word& word) {
|
inline ostream& operator << (ostream& os, const KeyWord& word) {
|
||||||
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}";
|
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
|
||||||
|
"}";
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,23 +1,4 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_MPSEGMENT_H
|
|
||||||
#define CPPJIEBA_MPSEGMENT_H
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
@ -31,63 +12,36 @@ namespace cppjieba {
|
||||||
|
|
||||||
class MPSegment: public SegmentTagged {
|
class MPSegment: public SegmentTagged {
|
||||||
public:
|
public:
|
||||||
MPSegment(const string& dictPath, const string& userDictPath = "")
|
|
||||||
: dictTrie_(new DictTrie(dictPath, userDictPath)), isNeedDestroy_(true) {
|
|
||||||
}
|
|
||||||
MPSegment(const DictTrie* dictTrie)
|
MPSegment(const DictTrie* dictTrie)
|
||||||
: dictTrie_(dictTrie), isNeedDestroy_(false) {
|
: dictTrie_(dictTrie) {
|
||||||
assert(dictTrie_);
|
assert(dictTrie_);
|
||||||
}
|
}
|
||||||
~MPSegment() {
|
~MPSegment() { }
|
||||||
if(isNeedDestroy_) {
|
|
||||||
delete dictTrie_;
|
virtual void Cut(RuneStrArray::const_iterator begin,
|
||||||
}
|
RuneStrArray::const_iterator end,
|
||||||
|
vector<WordRange>& words,
|
||||||
|
bool, size_t max_word_len) const override {
|
||||||
|
// vector<DatDag> dags;
|
||||||
|
// dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx
|
||||||
|
// CalcDP(dags);//动态规划(Dynamic Programming,DP),根据DAG计算最优动态规划路径--jxx
|
||||||
|
// CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx
|
||||||
|
dictTrie_->Find(begin, end, words, max_word_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Cut(const string& sentence, vector<string>& words) const {
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||||
Cut(sentence, words, MAX_WORD_LENGTH);
|
size_t) const override {
|
||||||
}
|
|
||||||
|
|
||||||
void Cut(const string& sentence,
|
|
||||||
vector<string>& words,
|
|
||||||
size_t max_word_len) const {
|
|
||||||
vector<Word> tmp;
|
|
||||||
Cut(sentence, tmp, max_word_len);
|
|
||||||
GetStringsFromWords(tmp, words);
|
|
||||||
}
|
|
||||||
void Cut(const string& sentence,
|
|
||||||
vector<Word>& words,
|
|
||||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
|
||||||
PreFilter pre_filter(symbols_, sentence);
|
|
||||||
PreFilter::Range range;
|
|
||||||
vector<WordRange> wrs;
|
|
||||||
wrs.reserve(sentence.size() / 2);
|
|
||||||
while(pre_filter.HasNext()) {
|
|
||||||
range = pre_filter.Next();
|
|
||||||
Cut(range.begin, range.end, wrs, max_word_len);
|
|
||||||
}
|
|
||||||
words.clear();
|
|
||||||
words.reserve(wrs.size());
|
|
||||||
GetWordsFromWordRanges(sentence, wrs, words);
|
|
||||||
}
|
|
||||||
void Cut(RuneStrArray::const_iterator begin,
|
|
||||||
RuneStrArray::const_iterator end,
|
|
||||||
vector<WordRange>& words,
|
|
||||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
|
||||||
vector<Dag> dags;
|
|
||||||
dictTrie_->Find(begin,
|
|
||||||
end,
|
|
||||||
dags,
|
|
||||||
max_word_len);
|
|
||||||
CalcDP(dags);
|
|
||||||
CutByDag(begin, end, dags, words);
|
|
||||||
}
|
}
|
||||||
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
|
||||||
|
size_t) const override {
|
||||||
|
|
||||||
const DictTrie* GetDictTrie() const {
|
}
|
||||||
|
const DictTrie* GetDictTrie() const override {
|
||||||
return dictTrie_;
|
return dictTrie_;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Tag(const string& src, vector<pair<string, string> >& res) const {
|
bool Tag(const string& src, vector<pair<string, string> >& res) const override {
|
||||||
return tagger_.Tag(src, res, *this);
|
return tagger_.Tag(src, res, *this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,61 +49,81 @@ public:
|
||||||
return dictTrie_->IsUserDictSingleChineseWord(value);
|
return dictTrie_->IsUserDictSingleChineseWord(value);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
void CalcDP(vector<Dag>& dags) const {
|
/*
|
||||||
size_t nextPos;
|
void CalcDP(vector<DatDag>& dags) const {
|
||||||
const DictUnit* p;
|
double val(0);
|
||||||
double val;
|
for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) {
|
||||||
|
rit->max_next = -1;
|
||||||
|
rit->max_weight = MIN_DOUBLE;
|
||||||
|
|
||||||
for(vector<Dag>::reverse_iterator rit = dags.rbegin(); rit != dags.rend(); rit++) {
|
for (const auto & it : rit->nexts) {
|
||||||
rit->pInfo = NULL;
|
const auto nextPos = it.first;
|
||||||
rit->weight = MIN_DOUBLE;
|
val = dictTrie_->GetMinWeight();
|
||||||
assert(!rit->nexts.empty());
|
|
||||||
for(LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) {
|
if (nullptr != it.second) {
|
||||||
nextPos = it->first;
|
val = it.second->weight;
|
||||||
p = it->second;
|
|
||||||
val = 0.0;
|
|
||||||
if(nextPos + 1 < dags.size()) {
|
|
||||||
val += dags[nextPos + 1].weight;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(p) {
|
if (nextPos < dags.size()) {
|
||||||
val += p->weight;
|
val += dags[nextPos].max_weight;
|
||||||
} else {
|
|
||||||
val += dictTrie_->GetMinWeight();
|
|
||||||
}
|
}
|
||||||
if(val > rit->weight) {
|
|
||||||
rit->pInfo = p;
|
if ((nextPos <= dags.size()) && (val > rit->max_weight)) {
|
||||||
rit->weight = val;
|
rit->max_weight = val;
|
||||||
|
rit->max_next = nextPos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void CutByDag(RuneStrArray::const_iterator begin,
|
*/
|
||||||
RuneStrArray::const_iterator end,
|
/* 倒叙方式重写CalcDP函数,初步测试未发现问题*/
|
||||||
const vector<Dag>& dags,
|
void CalcDP(vector<DatDag>& dags) const {
|
||||||
vector<WordRange>& words) const {
|
double val(0);
|
||||||
size_t i = 0;
|
size_t size = dags.size();
|
||||||
while(i < dags.size()) {
|
|
||||||
const DictUnit* p = dags[i].pInfo;
|
for (size_t i = 0; i < size; i++) {
|
||||||
if(p) {
|
dags[size - 1 - i].max_next = -1;
|
||||||
assert(p->word.size() >= 1);
|
dags[size - 1 - i].max_weight = MIN_DOUBLE;
|
||||||
WordRange wr(begin + i, begin + i + p->word.size() - 1);
|
|
||||||
words.push_back(wr);
|
for (const auto & it : dags[size - 1 - i].nexts) {
|
||||||
i += p->word.size();
|
const auto nextPos = it.first;
|
||||||
} else { //single chinese word
|
val = dictTrie_->GetMinWeight();
|
||||||
WordRange wr(begin + i, begin + i);
|
|
||||||
words.push_back(wr);
|
if (nullptr != it.second) {
|
||||||
i++;
|
val = it.second->weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nextPos < dags.size()) {
|
||||||
|
val += dags[nextPos].max_weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((nextPos <= dags.size()) && (val > dags[size - 1 - i].max_weight)) {
|
||||||
|
dags[size - 1 - i].max_weight = val;
|
||||||
|
dags[size - 1 - i].max_next = nextPos;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CutByDag(RuneStrArray::const_iterator begin,
|
||||||
|
RuneStrArray::const_iterator,
|
||||||
|
const vector<DatDag>& dags,
|
||||||
|
vector<WordRange>& words) const {
|
||||||
|
|
||||||
|
for (size_t i = 0; i < dags.size();) {
|
||||||
|
const auto next = dags[i].max_next;
|
||||||
|
assert(next > i);
|
||||||
|
assert(next <= dags.size());
|
||||||
|
WordRange wr(begin + i, begin + next - 1);
|
||||||
|
words.push_back(wr);
|
||||||
|
i = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const DictTrie* dictTrie_;
|
const DictTrie* dictTrie_;
|
||||||
bool isNeedDestroy_;
|
|
||||||
PosTagger tagger_;
|
PosTagger tagger_;
|
||||||
|
|
||||||
}; // class MPSegment
|
}; // class MPSegment
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,23 +1,4 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_MIXSEGMENT_H
|
|
||||||
#define CPPJIEBA_MIXSEGMENT_H
|
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include "MPSegment.hpp"
|
#include "MPSegment.hpp"
|
||||||
|
@ -28,70 +9,52 @@
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
class MixSegment: public SegmentTagged {
|
class MixSegment: public SegmentTagged {
|
||||||
public:
|
public:
|
||||||
MixSegment(const string& mpSegDict, const string& hmmSegDict,
|
MixSegment(const DictTrie* dictTrie,
|
||||||
const string& userDict = "")
|
const HMMModel* model,
|
||||||
: mpSeg_(mpSegDict, userDict),
|
const string& stopWordPath)
|
||||||
hmmSeg_(hmmSegDict) {
|
|
||||||
}
|
|
||||||
MixSegment(const DictTrie* dictTrie, const HMMModel* model)
|
|
||||||
: mpSeg_(dictTrie), hmmSeg_(model) {
|
: mpSeg_(dictTrie), hmmSeg_(model) {
|
||||||
|
LoadStopWordDict(stopWordPath);
|
||||||
}
|
}
|
||||||
~MixSegment() {
|
~MixSegment() {}
|
||||||
}
|
|
||||||
|
|
||||||
void Cut(const string& sentence, vector<string>& words) const {
|
virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
|
||||||
Cut(sentence, words, true);
|
size_t) const override {
|
||||||
}
|
if (!hmm) {
|
||||||
void Cut(const string& sentence, vector<string>& words, bool hmm) const {
|
mpSeg_.CutRuneArray(begin, end, res);
|
||||||
vector<Word> tmp;
|
|
||||||
Cut(sentence, tmp, hmm);
|
|
||||||
GetStringsFromWords(tmp, words);
|
|
||||||
}
|
|
||||||
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
|
||||||
PreFilter pre_filter(symbols_, sentence);
|
|
||||||
PreFilter::Range range;
|
|
||||||
vector<WordRange> wrs;
|
|
||||||
wrs.reserve(sentence.size() / 2);
|
|
||||||
while(pre_filter.HasNext()) {
|
|
||||||
range = pre_filter.Next();
|
|
||||||
Cut(range.begin, range.end, wrs, hmm);
|
|
||||||
}
|
|
||||||
words.clear();
|
|
||||||
words.reserve(wrs.size());
|
|
||||||
GetWordsFromWordRanges(sentence, wrs, words);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
|
|
||||||
if(!hmm) {
|
|
||||||
mpSeg_.Cut(begin, end, res);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<WordRange> words;
|
vector<WordRange> words;
|
||||||
assert(end >= begin);
|
assert(end >= begin);
|
||||||
words.reserve(end - begin);
|
words.reserve(end - begin);
|
||||||
mpSeg_.Cut(begin, end, words);
|
mpSeg_.CutRuneArray(begin, end, words);
|
||||||
|
|
||||||
vector<WordRange> hmmRes;
|
vector<WordRange> hmmRes;
|
||||||
hmmRes.reserve(end - begin);
|
hmmRes.reserve(end - begin);
|
||||||
for(size_t i = 0; i < words.size(); i++) {
|
|
||||||
|
for (size_t i = 0; i < words.size(); i++) {
|
||||||
//if mp Get a word, it's ok, put it into result
|
//if mp Get a word, it's ok, put it into result
|
||||||
if(words[i].left != words[i].right || (words[i].left == words[i].right && mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
|
if (words[i].left != words[i].right || (words[i].left == words[i].right &&
|
||||||
|
mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
|
||||||
res.push_back(words[i]);
|
res.push_back(words[i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if mp Get a single one and it is not in userdict, collect it in sequence
|
// if mp Get a single one and it is not in userdict, collect it in sequence
|
||||||
size_t j = i;
|
size_t j = i;
|
||||||
while(j < words.size() && words[j].left == words[j].right && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
|
|
||||||
|
while (j < words.size() && words[j].left == words[j].right &&
|
||||||
|
!mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cut the sequence with hmm
|
// Cut the sequence with hmm
|
||||||
assert(j - 1 >= i);
|
assert(j - 1 >= i);
|
||||||
// TODO
|
// TODO
|
||||||
hmmSeg_.Cut(words[i].left, words[j - 1].left + 1, hmmRes);
|
hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
|
||||||
|
|
||||||
//put hmm result to result
|
//put hmm result to result
|
||||||
for(size_t k = 0; k < hmmRes.size(); k++) {
|
for (size_t k = 0; k < hmmRes.size(); k++) {
|
||||||
res.push_back(hmmRes[k]);
|
res.push_back(hmmRes[k]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,11 +66,141 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const DictTrie* GetDictTrie() const {
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||||
|
size_t) const override {
|
||||||
|
//目前hmm默认开启,后期如有需要关闭再修改--jxx20210519
|
||||||
|
// if (!hmm) {
|
||||||
|
// mpSeg_.CutRuneArray(begin, end, res);
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
|
||||||
|
vector<WordRange> words;
|
||||||
|
assert(end >= begin);
|
||||||
|
words.reserve(end - begin);
|
||||||
|
mpSeg_.CutRuneArray(begin, end, words);
|
||||||
|
|
||||||
|
vector<WordRange> hmmRes;
|
||||||
|
hmmRes.reserve(end - begin);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < words.size(); i++) {
|
||||||
|
//if mp Get a word, it's ok, put it into result
|
||||||
|
if (words[i].left != words[i].right) {
|
||||||
|
res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
|
||||||
|
|| i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
|
||||||
|
res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if mp Get a single one and it is not in userdict, collect it in sequence
|
||||||
|
size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里(i字符不是最后一个字符),直接判定j字符
|
||||||
|
|
||||||
|
while (j < (words.size() - 1) && words[j].left == words[j].right &&
|
||||||
|
!mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cut the sequence with hmm
|
||||||
|
assert(j - 1 >= i);
|
||||||
|
// TODO
|
||||||
|
hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
|
||||||
|
|
||||||
|
//put hmm result to result
|
||||||
|
for (size_t k = 0; k < hmmRes.size(); k++) {
|
||||||
|
res.push_back(GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right));
|
||||||
|
}
|
||||||
|
|
||||||
|
//clear tmp vars
|
||||||
|
hmmRes.clear();
|
||||||
|
|
||||||
|
//let i jump over this piece
|
||||||
|
i = j - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
|
||||||
|
size_t) const override {
|
||||||
|
vector<WordRange> words;
|
||||||
|
vector<WordRange> hmmRes;
|
||||||
|
assert(end >= begin);
|
||||||
|
if (3 == begin->len or 4 == begin->len) {
|
||||||
|
words.reserve(end - begin);
|
||||||
|
mpSeg_.CutRuneArray(begin, end, words);
|
||||||
|
hmmRes.reserve(words.size());
|
||||||
|
} else {
|
||||||
|
hmmRes.reserve(end - begin);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (words.size() != 0) {//存在中文分词结果
|
||||||
|
for (size_t i = 0; i < words.size(); i++) {
|
||||||
|
|
||||||
|
string str = GetStringFromRunes(s, words[i].left, words[i].right);
|
||||||
|
|
||||||
|
if (stopWords_.find(str) != stopWords_.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (words[i].left != words[i].right) {
|
||||||
|
res[str].offsets.push_back(words[i].left->offset);
|
||||||
|
res[str].weight += 1.0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
|
||||||
|
|| i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
|
||||||
|
if (stopWords_.find(str) != stopWords_.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
res[str].offsets.push_back(words[i].left->offset);
|
||||||
|
res[str].weight += 1.0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if mp Get a single one and it is not in userdict, collect it in sequence
|
||||||
|
size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里(i字符不是最后一个字符),直接判定j字符
|
||||||
|
|
||||||
|
while (j < (words.size() - 1)
|
||||||
|
&& words[j].left == words[j].right
|
||||||
|
&& !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cut the sequence with hmm
|
||||||
|
assert(j - 1 >= i);
|
||||||
|
// TODO
|
||||||
|
hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
|
||||||
|
|
||||||
|
//put hmm result to result
|
||||||
|
for (size_t k = 0; k < hmmRes.size(); k++) {
|
||||||
|
string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right);
|
||||||
|
if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
res[hmmStr].offsets.push_back(hmmRes[k].left->offset);
|
||||||
|
res[hmmStr].weight += 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
//clear tmp vars
|
||||||
|
hmmRes.clear();
|
||||||
|
|
||||||
|
//let i jump over this piece
|
||||||
|
i = j - 1;
|
||||||
|
}
|
||||||
|
} else {//不存在中文分词结果
|
||||||
|
for (size_t i = 0; i < (size_t)(end - begin); i++) {
|
||||||
|
string str = s.substr((begin+i)->offset, (begin+i)->len);
|
||||||
|
res[str].offsets.push_back((begin+i)->offset);
|
||||||
|
res[str].weight += 1.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const DictTrie* GetDictTrie() const override {
|
||||||
return mpSeg_.GetDictTrie();
|
return mpSeg_.GetDictTrie();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Tag(const string& src, vector<pair<string, string> >& res) const {
|
bool Tag(const string& src, vector<pair<string, string> >& res) const override {
|
||||||
return tagger_.Tag(src, res, *this);
|
return tagger_.Tag(src, res, *this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,7 +208,23 @@ public:
|
||||||
return tagger_.LookupTag(str, *this);
|
return tagger_.LookupTag(str, *this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LoadStopWordDict(const string& filePath) {
|
||||||
|
ifstream ifs(filePath.c_str());
|
||||||
|
if(not ifs.is_open()){
|
||||||
|
return ;
|
||||||
|
}
|
||||||
|
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
|
||||||
|
string line ;
|
||||||
|
|
||||||
|
while (getline(ifs, line)) {
|
||||||
|
stopWords_.insert(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(stopWords_.size());
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
|
unordered_set<string> stopWords_;
|
||||||
|
|
||||||
MPSegment mpSeg_;
|
MPSegment mpSeg_;
|
||||||
HMMSegment hmmSeg_;
|
HMMSegment hmmSeg_;
|
||||||
PosTagger tagger_;
|
PosTagger tagger_;
|
||||||
|
@ -124,4 +233,3 @@ private:
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,27 +1,8 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_POS_TAGGING_H
|
|
||||||
#define CPPJIEBA_POS_TAGGING_H
|
|
||||||
|
|
||||||
#include "limonp/StringUtil.hpp"
|
#include "limonp/StringUtil.hpp"
|
||||||
#include "SegmentTagged.hpp"
|
|
||||||
#include "DictTrie.hpp"
|
#include "DictTrie.hpp"
|
||||||
|
#include "SegmentTagged.hpp"
|
||||||
|
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
using namespace limonp;
|
using namespace limonp;
|
||||||
|
@ -39,28 +20,31 @@ public:
|
||||||
|
|
||||||
bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
|
bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
|
||||||
vector<string> CutRes;
|
vector<string> CutRes;
|
||||||
segment.Cut(src, CutRes);
|
segment.CutToStr(src, CutRes);
|
||||||
|
|
||||||
for(vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
|
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
|
||||||
res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
|
res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return !res.empty();
|
return !res.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
string LookupTag(const string &str, const SegmentTagged& segment) const {
|
string LookupTag(const string &str, const SegmentTagged& segment) const {
|
||||||
const DictUnit *tmp = NULL;
|
|
||||||
RuneStrArray runes;
|
|
||||||
const DictTrie * dict = segment.GetDictTrie();
|
const DictTrie * dict = segment.GetDictTrie();
|
||||||
assert(dict != NULL);
|
assert(dict != NULL);
|
||||||
if(!DecodeRunesInString(str, runes)) {
|
const auto tmp = dict->Find(str);
|
||||||
XLOG(ERROR) << "Decode failed.";
|
|
||||||
return POS_X;
|
if (tmp == NULL || tmp->GetTag().empty()) {
|
||||||
}
|
RuneStrArray runes;
|
||||||
tmp = dict->Find(runes.begin(), runes.end());
|
|
||||||
if(tmp == NULL || tmp->tag.empty()) {
|
if (!DecodeRunesInString(str, runes)) {
|
||||||
|
XLOG(ERROR) << "Decode failed.";
|
||||||
|
return POS_X;
|
||||||
|
}
|
||||||
|
|
||||||
return SpecialRule(runes);
|
return SpecialRule(runes);
|
||||||
} else {
|
} else {
|
||||||
return tmp->tag;
|
return tmp->GetTag();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,22 +52,27 @@ private:
|
||||||
const char* SpecialRule(const RuneStrArray& unicode) const {
|
const char* SpecialRule(const RuneStrArray& unicode) const {
|
||||||
size_t m = 0;
|
size_t m = 0;
|
||||||
size_t eng = 0;
|
size_t eng = 0;
|
||||||
for(size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
|
|
||||||
if(unicode[i].rune < 0x80) {
|
for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
|
||||||
|
if (unicode[i].rune < 0x80) {
|
||||||
eng ++;
|
eng ++;
|
||||||
if('0' <= unicode[i].rune && unicode[i].rune <= '9') {
|
|
||||||
|
if ('0' <= unicode[i].rune && unicode[i].rune <= '9') {
|
||||||
m++;
|
m++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ascii char is not found
|
// ascii char is not found
|
||||||
if(eng == 0) {
|
if (eng == 0) {
|
||||||
return POS_X;
|
return POS_X;
|
||||||
}
|
}
|
||||||
|
|
||||||
// all the ascii is number char
|
// all the ascii is number char
|
||||||
if(m == eng) {
|
if (m == eng) {
|
||||||
return POS_M;
|
return POS_M;
|
||||||
}
|
}
|
||||||
|
|
||||||
// the ascii chars contain english letter
|
// the ascii chars contain english letter
|
||||||
return POS_ENG;
|
return POS_ENG;
|
||||||
}
|
}
|
||||||
|
@ -92,4 +81,3 @@ private:
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,43 +1,20 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_PRE_FILTER_H
|
|
||||||
#define CPPJIEBA_PRE_FILTER_H
|
|
||||||
|
|
||||||
#include "Trie.hpp"
|
|
||||||
#include "limonp/Logging.hpp"
|
#include "limonp/Logging.hpp"
|
||||||
|
#include <unordered_set>
|
||||||
|
#include "Unicode.hpp"
|
||||||
|
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
|
|
||||||
class PreFilter {
|
class PreFilter {
|
||||||
public:
|
public:
|
||||||
//TODO use WordRange instead of Range
|
PreFilter(const std::unordered_set<Rune>& symbols,
|
||||||
struct Range {
|
|
||||||
RuneStrArray::const_iterator begin;
|
|
||||||
RuneStrArray::const_iterator end;
|
|
||||||
}; // struct Range
|
|
||||||
|
|
||||||
PreFilter(const unordered_set<Rune>& symbols,
|
|
||||||
const string& sentence)
|
const string& sentence)
|
||||||
: symbols_(symbols) {
|
: symbols_(symbols) {
|
||||||
if(!DecodeRunesInString(sentence, sentence_)) {
|
if (!DecodeRunesInString(sentence, sentence_)) {
|
||||||
XLOG(ERROR) << "decode failed. ";
|
XLOG(ERROR) << "decode failed. "<<sentence;
|
||||||
}
|
}
|
||||||
|
|
||||||
cursor_ = sentence_.begin();
|
cursor_ = sentence_.begin();
|
||||||
}
|
}
|
||||||
~PreFilter() {
|
~PreFilter() {
|
||||||
|
@ -45,28 +22,105 @@ public:
|
||||||
bool HasNext() const {
|
bool HasNext() const {
|
||||||
return cursor_ != sentence_.end();
|
return cursor_ != sentence_.end();
|
||||||
}
|
}
|
||||||
Range Next() {
|
bool Next(WordRange& wordRange) {
|
||||||
Range range;
|
|
||||||
range.begin = cursor_;
|
if (cursor_ == sentence_.end()) {
|
||||||
while(cursor_ != sentence_.end()) {
|
return false;
|
||||||
if(IsIn(symbols_, cursor_->rune)) {
|
}
|
||||||
if(range.begin == cursor_) {
|
|
||||||
|
wordRange.left = cursor_;
|
||||||
|
|
||||||
|
while (cursor_->rune == 0x20 && cursor_ != sentence_.end()) {
|
||||||
|
cursor_++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cursor_ == sentence_.end()) {
|
||||||
|
wordRange.right = cursor_;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (++cursor_ != sentence_.end()) {
|
||||||
|
if (cursor_->rune == 0x20) {
|
||||||
|
wordRange.right = cursor_;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
wordRange.right = sentence_.end();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Next(WordRange& wordRange, bool& isNull) {
|
||||||
|
isNull = false;
|
||||||
|
if (cursor_ == sentence_.end()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
wordRange.left = cursor_;
|
||||||
|
if (cursor_->rune == 0x20) {
|
||||||
|
while (cursor_ != sentence_.end()) {
|
||||||
|
if (cursor_->rune != 0x20) {
|
||||||
|
if (wordRange.left == cursor_) {
|
||||||
|
cursor_ ++;
|
||||||
|
}
|
||||||
|
wordRange.right = cursor_;
|
||||||
|
isNull = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
cursor_ ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int max_num = 0;
|
||||||
|
uint32_t utf8_num = cursor_->len;
|
||||||
|
|
||||||
|
while (cursor_ != sentence_.end()) {
|
||||||
|
if (cursor_->rune == 0x20) {
|
||||||
|
if (wordRange.left == cursor_) {
|
||||||
cursor_ ++;
|
cursor_ ++;
|
||||||
}
|
}
|
||||||
range.end = cursor_;
|
|
||||||
|
wordRange.right = cursor_;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
cursor_ ++;
|
||||||
|
max_num++;
|
||||||
|
if (max_num >= 1024 or cursor_->len != utf8_num) { //todo 防止一次性传入过多字节,暂定限制为1024个字
|
||||||
|
wordRange.right = cursor_;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
wordRange.right = sentence_.end();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
WordRange Next() {
|
||||||
|
WordRange range(cursor_, cursor_);
|
||||||
|
|
||||||
|
while (cursor_ != sentence_.end()) {
|
||||||
|
//if (IsIn(symbols_, cursor_->rune)) {
|
||||||
|
if (cursor_->rune == 0x20) {
|
||||||
|
if (range.left == cursor_) {
|
||||||
|
cursor_ ++;
|
||||||
|
}
|
||||||
|
|
||||||
|
range.right = cursor_;
|
||||||
return range;
|
return range;
|
||||||
}
|
}
|
||||||
|
|
||||||
cursor_ ++;
|
cursor_ ++;
|
||||||
}
|
}
|
||||||
range.end = sentence_.end();
|
|
||||||
|
range.right = sentence_.end();
|
||||||
return range;
|
return range;
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
RuneStrArray::const_iterator cursor_;
|
RuneStrArray::const_iterator cursor_;
|
||||||
RuneStrArray sentence_;
|
RuneStrArray sentence_;
|
||||||
const unordered_set<Rune>& symbols_;
|
const std::unordered_set<Rune>& symbols_;
|
||||||
}; // class PreFilter
|
}; // class PreFilter
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif // CPPJIEBA_PRE_FILTER_H
|
|
||||||
|
|
|
@ -1,23 +1,4 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_QUERYSEGMENT_H
|
|
||||||
#define CPPJIEBA_QUERYSEGMENT_H
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
@ -28,74 +9,70 @@
|
||||||
#include "FullSegment.hpp"
|
#include "FullSegment.hpp"
|
||||||
#include "MixSegment.hpp"
|
#include "MixSegment.hpp"
|
||||||
#include "Unicode.hpp"
|
#include "Unicode.hpp"
|
||||||
|
#include "DictTrie.hpp"
|
||||||
|
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
class QuerySegment: public SegmentBase {
|
class QuerySegment: public SegmentBase {
|
||||||
public:
|
public:
|
||||||
QuerySegment(const string& dict, const string& model, const string& userDict = "")
|
QuerySegment(const DictTrie* dictTrie,
|
||||||
: mixSeg_(dict, model, userDict),
|
const HMMModel* model,
|
||||||
trie_(mixSeg_.GetDictTrie()) {
|
const string& stopWordPath)
|
||||||
}
|
: mixSeg_(dictTrie, model, stopWordPath), trie_(dictTrie) {
|
||||||
QuerySegment(const DictTrie* dictTrie, const HMMModel* model)
|
|
||||||
: mixSeg_(dictTrie, model), trie_(dictTrie) {
|
|
||||||
}
|
}
|
||||||
~QuerySegment() {
|
~QuerySegment() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Cut(const string& sentence, vector<string>& words) const {
|
virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
|
||||||
Cut(sentence, words, true);
|
size_t) const override {
|
||||||
}
|
|
||||||
void Cut(const string& sentence, vector<string>& words, bool hmm) const {
|
|
||||||
vector<Word> tmp;
|
|
||||||
Cut(sentence, tmp, hmm);
|
|
||||||
GetStringsFromWords(tmp, words);
|
|
||||||
}
|
|
||||||
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
|
||||||
PreFilter pre_filter(symbols_, sentence);
|
|
||||||
PreFilter::Range range;
|
|
||||||
vector<WordRange> wrs;
|
|
||||||
wrs.reserve(sentence.size() / 2);
|
|
||||||
while(pre_filter.HasNext()) {
|
|
||||||
range = pre_filter.Next();
|
|
||||||
Cut(range.begin, range.end, wrs, hmm);
|
|
||||||
}
|
|
||||||
words.clear();
|
|
||||||
words.reserve(wrs.size());
|
|
||||||
GetWordsFromWordRanges(sentence, wrs, words);
|
|
||||||
}
|
|
||||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
|
|
||||||
//use mix Cut first
|
//use mix Cut first
|
||||||
vector<WordRange> mixRes;
|
vector<WordRange> mixRes;
|
||||||
mixSeg_.Cut(begin, end, mixRes, hmm);
|
mixSeg_.CutRuneArray(begin, end, mixRes, hmm);
|
||||||
|
|
||||||
vector<WordRange> fullRes;
|
vector<WordRange> fullRes;
|
||||||
for(vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
|
|
||||||
if(mixResItr->Length() > 2) {
|
for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
|
||||||
for(size_t i = 0; i + 1 < mixResItr->Length(); i++) {
|
if (mixResItr->Length() > 2) {
|
||||||
WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
|
for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
|
||||||
if(trie_->Find(wr.left, wr.right + 1) != NULL) {
|
string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 2);
|
||||||
|
|
||||||
|
if (trie_->Find(text) != NULL) {
|
||||||
|
WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
|
||||||
res.push_back(wr);
|
res.push_back(wr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(mixResItr->Length() > 3) {
|
|
||||||
for(size_t i = 0; i + 2 < mixResItr->Length(); i++) {
|
if (mixResItr->Length() > 3) {
|
||||||
WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
|
for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
|
||||||
if(trie_->Find(wr.left, wr.right + 1) != NULL) {
|
string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 3);
|
||||||
|
|
||||||
|
if (trie_->Find(text) != NULL) {
|
||||||
|
WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
|
||||||
res.push_back(wr);
|
res.push_back(wr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
res.push_back(*mixResItr);
|
res.push_back(*mixResItr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||||
|
size_t) const override {
|
||||||
|
|
||||||
|
}
|
||||||
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
|
||||||
|
size_t) const override {
|
||||||
|
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
bool IsAllAscii(const Unicode& s) const {
|
bool IsAllAscii(const RuneArray& s) const {
|
||||||
for(size_t i = 0; i < s.size(); i++) {
|
for (size_t i = 0; i < s.size(); i++) {
|
||||||
if(s[i] >= 0x80) {
|
if (s[i] >= 0x80) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
MixSegment mixSeg_;
|
MixSegment mixSeg_;
|
||||||
|
@ -104,4 +81,3 @@ private:
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,23 +1,4 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_SEGMENTBASE_H
|
|
||||||
#define CPPJIEBA_SEGMENTBASE_H
|
|
||||||
|
|
||||||
#include "limonp/Logging.hpp"
|
#include "limonp/Logging.hpp"
|
||||||
#include "PreFilter.hpp"
|
#include "PreFilter.hpp"
|
||||||
|
@ -35,24 +16,74 @@ public:
|
||||||
SegmentBase() {
|
SegmentBase() {
|
||||||
XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
|
XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
|
||||||
}
|
}
|
||||||
virtual ~SegmentBase() {
|
virtual ~SegmentBase() { }
|
||||||
|
|
||||||
|
virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
|
||||||
|
size_t max_word_len) const = 0;
|
||||||
|
//添加基于sentence的cut方法,减少中间变量的存储与格式转换--jxx20210517
|
||||||
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||||
|
size_t max_word_len) const = 0;
|
||||||
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
|
||||||
|
size_t max_word_len) const = 0;
|
||||||
|
//重写CutToStr函数,简化获取vector<string>& words的流程,降低内存占用--jxx20210517
|
||||||
|
void CutToStr(const string& sentence, vector<string>& words, bool hmm = true,
|
||||||
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
|
PreFilter pre_filter(symbols_, sentence);
|
||||||
|
words.clear();
|
||||||
|
words.reserve(sentence.size() / 2);//todo 参考源码,参数待定
|
||||||
|
RuneStrArray::const_iterator null_p;
|
||||||
|
WordRange range(null_p, null_p);
|
||||||
|
while (pre_filter.Next(range)) {
|
||||||
|
CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void CutToStr(const string& sentence, WordRange range, vector<string>& words, bool hmm = true,
|
||||||
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
|
CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
|
||||||
|
}
|
||||||
|
void CutToStr(const string& sentence, WordRange range, unordered_map<string, KeyWord>& words, bool hmm = true,
|
||||||
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
|
CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
|
||||||
|
}
|
||||||
|
void CutToWord(const string& sentence, vector<Word>& words, bool hmm = true,
|
||||||
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
|
PreFilter pre_filter(symbols_, sentence);
|
||||||
|
vector<WordRange> wrs;
|
||||||
|
wrs.reserve(sentence.size() / 2);
|
||||||
|
|
||||||
|
while (pre_filter.HasNext()) {
|
||||||
|
auto range = pre_filter.Next();
|
||||||
|
Cut(range.left, range.right, wrs, hmm, max_word_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
words.clear();
|
||||||
|
words.reserve(wrs.size());
|
||||||
|
GetWordsFromWordRanges(sentence, wrs, words);
|
||||||
|
wrs.clear();
|
||||||
|
vector<WordRange>().swap(wrs);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void Cut(const string& sentence, vector<string>& words) const = 0;
|
void CutRuneArray(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res,
|
||||||
|
bool hmm = true, size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
|
Cut(begin, end, res, hmm, max_word_len);
|
||||||
|
}
|
||||||
|
|
||||||
bool ResetSeparators(const string& s) {
|
bool ResetSeparators(const string& s) {
|
||||||
symbols_.clear();
|
symbols_.clear();
|
||||||
RuneStrArray runes;
|
RuneStrArray runes;
|
||||||
if(!DecodeRunesInString(s, runes)) {
|
|
||||||
|
if (!DecodeRunesInString(s, runes)) {
|
||||||
XLOG(ERROR) << "decode " << s << " failed";
|
XLOG(ERROR) << "decode " << s << " failed";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for(size_t i = 0; i < runes.size(); i++) {
|
|
||||||
if(!symbols_.insert(runes[i].rune).second) {
|
for (size_t i = 0; i < runes.size(); i++) {
|
||||||
|
if (!symbols_.insert(runes[i].rune).second) {
|
||||||
XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
|
XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
|
@ -61,4 +92,3 @@ protected:
|
||||||
|
|
||||||
} // cppjieba
|
} // cppjieba
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,23 +1,4 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_SEGMENTTAGGED_H
|
|
||||||
#define CPPJIEBA_SEGMENTTAGGED_H
|
|
||||||
|
|
||||||
#include "SegmentBase.hpp"
|
#include "SegmentBase.hpp"
|
||||||
|
|
||||||
|
@ -38,4 +19,3 @@ public:
|
||||||
|
|
||||||
} // cppjieba
|
} // cppjieba
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,212 +1,205 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
#include <cmath>
|
||||||
*
|
#include "Jieba.hpp"
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
namespace cppjieba {
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
using namespace limonp;
|
||||||
* (at your option) any later version.
|
using namespace std;
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
class TextRankExtractor {
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
public:
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
typedef struct _Word {
|
||||||
* GNU General Public License for more details.
|
string word;
|
||||||
*
|
vector<size_t> offsets;
|
||||||
* You should have received a copy of the GNU General Public License
|
double weight;
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
} Word; // struct Word
|
||||||
*
|
private:
|
||||||
*
|
typedef std::map<string, Word> WordMap;
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H
|
class WordGraph {
|
||||||
#define CPPJIEBA_TEXTRANK_EXTRACTOR_H
|
private:
|
||||||
|
typedef double Score;
|
||||||
#include <cmath>
|
typedef string Node;
|
||||||
#include "Jieba.hpp"
|
typedef std::set<Node> NodeSet;
|
||||||
|
|
||||||
namespace cppjieba {
|
typedef std::map<Node, double> Edges;
|
||||||
using namespace limonp;
|
typedef std::map<Node, Edges> Graph;
|
||||||
using namespace std;
|
//typedef std::unordered_map<Node,double> Edges;
|
||||||
|
//typedef std::unordered_map<Node,Edges> Graph;
|
||||||
class TextRankExtractor {
|
|
||||||
public:
|
double d;
|
||||||
typedef struct _Word {
|
Graph graph;
|
||||||
string word;
|
NodeSet nodeSet;
|
||||||
vector<size_t> offsets;
|
public:
|
||||||
double weight;
|
WordGraph(): d(0.85) {};
|
||||||
} Word; // struct Word
|
WordGraph(double in_d): d(in_d) {};
|
||||||
private:
|
|
||||||
typedef std::map<string, Word> WordMap;
|
void addEdge(Node start, Node end, double weight) {
|
||||||
|
Edges temp;
|
||||||
class WordGraph {
|
Edges::iterator gotEdges;
|
||||||
private:
|
nodeSet.insert(start);
|
||||||
typedef double Score;
|
nodeSet.insert(end);
|
||||||
typedef string Node;
|
graph[start][end] += weight;
|
||||||
typedef std::set<Node> NodeSet;
|
graph[end][start] += weight;
|
||||||
|
}
|
||||||
typedef std::map<Node, double> Edges;
|
|
||||||
typedef std::map<Node, Edges> Graph;
|
void rank(WordMap &ws, size_t rankTime = 10) {
|
||||||
//typedef std::unordered_map<Node,double> Edges;
|
WordMap outSum;
|
||||||
//typedef std::unordered_map<Node,Edges> Graph;
|
Score wsdef, min_rank, max_rank;
|
||||||
|
|
||||||
double d;
|
if (graph.size() == 0) {
|
||||||
Graph graph;
|
return;
|
||||||
NodeSet nodeSet;
|
}
|
||||||
public:
|
|
||||||
WordGraph(): d(0.85) {};
|
wsdef = 1.0 / graph.size();
|
||||||
WordGraph(double in_d): d(in_d) {};
|
|
||||||
|
for (Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) {
|
||||||
void addEdge(Node start, Node end, double weight) {
|
// edges->first start节点;edge->first end节点;edge->second 权重
|
||||||
Edges temp;
|
ws[edges->first].word = edges->first;
|
||||||
Edges::iterator gotEdges;
|
ws[edges->first].weight = wsdef;
|
||||||
nodeSet.insert(start);
|
outSum[edges->first].weight = 0;
|
||||||
nodeSet.insert(end);
|
|
||||||
graph[start][end] += weight;
|
for (Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) {
|
||||||
graph[end][start] += weight;
|
outSum[edges->first].weight += edge->second;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
void rank(WordMap &ws, size_t rankTime = 10) {
|
|
||||||
WordMap outSum;
|
//sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
|
||||||
Score wsdef, min_rank, max_rank;
|
for (size_t i = 0; i < rankTime; i++) {
|
||||||
|
for (NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) {
|
||||||
if(graph.size() == 0)
|
double s = 0;
|
||||||
return;
|
|
||||||
|
for (Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
|
||||||
wsdef = 1.0 / graph.size();
|
// edge->first end节点;edge->second 权重
|
||||||
|
{
|
||||||
for(Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) {
|
s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
|
||||||
// edges->first start节点;edge->first end节点;edge->second 权重
|
}
|
||||||
ws[edges->first].word = edges->first;
|
|
||||||
ws[edges->first].weight = wsdef;
|
ws[*node].weight = (1 - d) + d * s;
|
||||||
outSum[edges->first].weight = 0;
|
}
|
||||||
for(Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) {
|
}
|
||||||
outSum[edges->first].weight += edge->second;
|
|
||||||
}
|
min_rank = max_rank = ws.begin()->second.weight;
|
||||||
}
|
|
||||||
//sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
|
for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
|
||||||
for(size_t i = 0; i < rankTime; i++) {
|
if (i->second.weight < min_rank) {
|
||||||
for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) {
|
min_rank = i->second.weight;
|
||||||
double s = 0;
|
}
|
||||||
for(Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
|
|
||||||
// edge->first end节点;edge->second 权重
|
if (i->second.weight > max_rank) {
|
||||||
s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
|
max_rank = i->second.weight;
|
||||||
ws[*node].weight = (1 - d) + d * s;
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
|
||||||
min_rank = max_rank = ws.begin()->second.weight;
|
ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
|
||||||
for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
|
}
|
||||||
if(i->second.weight < min_rank) {
|
}
|
||||||
min_rank = i->second.weight;
|
};
|
||||||
}
|
|
||||||
if(i->second.weight > max_rank) {
|
public:
|
||||||
max_rank = i->second.weight;
|
TextRankExtractor(const DictTrie* dictTrie,
|
||||||
}
|
const HMMModel* model,
|
||||||
}
|
const string& stopWordPath)
|
||||||
for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
|
: segment_(dictTrie, model) {
|
||||||
ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
|
LoadStopWordDict(stopWordPath);
|
||||||
}
|
}
|
||||||
}
|
TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
|
||||||
};
|
LoadStopWordDict(stopWordPath);
|
||||||
|
}
|
||||||
public:
|
~TextRankExtractor() {
|
||||||
TextRankExtractor(const string& dictPath,
|
}
|
||||||
const string& hmmFilePath,
|
|
||||||
const string& stopWordPath,
|
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
||||||
const string& userDict = "")
|
vector<Word> topWords;
|
||||||
: segment_(dictPath, hmmFilePath, userDict) {
|
Extract(sentence, topWords, topN);
|
||||||
LoadStopWordDict(stopWordPath);
|
|
||||||
}
|
for (size_t i = 0; i < topWords.size(); i++) {
|
||||||
TextRankExtractor(const DictTrie* dictTrie,
|
keywords.push_back(topWords[i].word);
|
||||||
const HMMModel* model,
|
}
|
||||||
const string& stopWordPath)
|
}
|
||||||
: segment_(dictTrie, model) {
|
|
||||||
LoadStopWordDict(stopWordPath);
|
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
|
||||||
}
|
vector<Word> topWords;
|
||||||
TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
|
Extract(sentence, topWords, topN);
|
||||||
LoadStopWordDict(stopWordPath);
|
|
||||||
}
|
for (size_t i = 0; i < topWords.size(); i++) {
|
||||||
~TextRankExtractor() {
|
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
|
||||||
vector<Word> topWords;
|
void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
|
||||||
Extract(sentence, topWords, topN);
|
vector<string> words;
|
||||||
for(size_t i = 0; i < topWords.size(); i++) {
|
segment_.CutToStr(sentence, words);
|
||||||
keywords.push_back(topWords[i].word);
|
|
||||||
}
|
TextRankExtractor::WordGraph graph;
|
||||||
}
|
WordMap wordmap;
|
||||||
|
size_t offset = 0;
|
||||||
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
|
|
||||||
vector<Word> topWords;
|
for (size_t i = 0; i < words.size(); i++) {
|
||||||
Extract(sentence, topWords, topN);
|
size_t t = offset;
|
||||||
for(size_t i = 0; i < topWords.size(); i++) {
|
offset += words[i].size();
|
||||||
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
|
|
||||||
}
|
if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
|
||||||
}
|
continue;
|
||||||
|
}
|
||||||
void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
|
|
||||||
vector<string> words;
|
for (size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) {
|
||||||
segment_.Cut(sentence, words);
|
if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
|
||||||
|
skip++;
|
||||||
TextRankExtractor::WordGraph graph;
|
continue;
|
||||||
WordMap wordmap;
|
}
|
||||||
size_t offset = 0;
|
|
||||||
|
graph.addEdge(words[i], words[j], 1);
|
||||||
for(size_t i = 0; i < words.size(); i++) {
|
}
|
||||||
size_t t = offset;
|
|
||||||
offset += words[i].size();
|
wordmap[words[i]].offsets.push_back(t);
|
||||||
if(IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
|
}
|
||||||
continue;
|
|
||||||
}
|
if (offset != sentence.size()) {
|
||||||
for(size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) {
|
XLOG(ERROR) << "words illegal";
|
||||||
if(IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
|
return;
|
||||||
skip++;
|
}
|
||||||
continue;
|
|
||||||
}
|
graph.rank(wordmap, rankTime);
|
||||||
graph.addEdge(words[i], words[j], 1);
|
|
||||||
}
|
keywords.clear();
|
||||||
wordmap[words[i]].offsets.push_back(t);
|
keywords.reserve(wordmap.size());
|
||||||
}
|
|
||||||
if(offset != sentence.size()) {
|
for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
|
||||||
XLOG(ERROR) << "words illegal";
|
keywords.push_back(itr->second);
|
||||||
return;
|
}
|
||||||
}
|
|
||||||
|
topN = min(topN, keywords.size());
|
||||||
graph.rank(wordmap, rankTime);
|
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
|
||||||
|
keywords.resize(topN);
|
||||||
keywords.clear();
|
}
|
||||||
keywords.reserve(wordmap.size());
|
private:
|
||||||
for(WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
|
void LoadStopWordDict(const string& filePath) {
|
||||||
keywords.push_back(itr->second);
|
ifstream ifs(filePath.c_str());
|
||||||
}
|
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
|
||||||
|
string line ;
|
||||||
topN = min(topN, keywords.size());
|
|
||||||
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
|
while (getline(ifs, line)) {
|
||||||
keywords.resize(topN);
|
stopWords_.insert(line);
|
||||||
}
|
}
|
||||||
private:
|
|
||||||
void LoadStopWordDict(const string& filePath) {
|
assert(stopWords_.size());
|
||||||
ifstream ifs(filePath.c_str());
|
}
|
||||||
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
|
|
||||||
string line ;
|
static bool Compare(const Word &x, const Word &y) {
|
||||||
while(getline(ifs, line)) {
|
return x.weight > y.weight;
|
||||||
stopWords_.insert(line);
|
}
|
||||||
}
|
|
||||||
assert(stopWords_.size());
|
MixSegment segment_;
|
||||||
}
|
unordered_set<string> stopWords_;
|
||||||
|
}; // class TextRankExtractor
|
||||||
static bool Compare(const Word &x, const Word &y) {
|
|
||||||
return x.weight > y.weight;
|
inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
|
||||||
}
|
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
|
||||||
|
"}";
|
||||||
MixSegment segment_;
|
}
|
||||||
unordered_set<string> stopWords_;
|
} // namespace cppjieba
|
||||||
}; // class TextRankExtractor
|
|
||||||
|
|
||||||
inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
|
|
||||||
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}";
|
|
||||||
}
|
|
||||||
} // namespace cppjieba
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,192 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_TRIE_HPP
|
|
||||||
#define CPPJIEBA_TRIE_HPP
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <queue>
|
|
||||||
#include "limonp/StdExtension.hpp"
|
|
||||||
#include "Unicode.hpp"
|
|
||||||
|
|
||||||
namespace cppjieba {
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
const size_t MAX_WORD_LENGTH = 512;
|
|
||||||
|
|
||||||
struct DictUnit {
|
|
||||||
Unicode word;
|
|
||||||
double weight;
|
|
||||||
string tag;
|
|
||||||
}; // struct DictUnit
|
|
||||||
|
|
||||||
// for debugging
|
|
||||||
// inline ostream & operator << (ostream& os, const DictUnit& unit) {
|
|
||||||
// string s;
|
|
||||||
// s << unit.word;
|
|
||||||
// return os << StringFormat("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
|
|
||||||
// }
|
|
||||||
|
|
||||||
struct Dag {
|
|
||||||
RuneStr runestr;
|
|
||||||
// [offset, nexts.first]
|
|
||||||
limonp::LocalVector<pair<size_t, const DictUnit*> > nexts;
|
|
||||||
const DictUnit * pInfo;
|
|
||||||
double weight;
|
|
||||||
size_t nextPos; // TODO
|
|
||||||
Dag(): runestr(), pInfo(NULL), weight(0.0), nextPos(0) {
|
|
||||||
}
|
|
||||||
}; // struct Dag
|
|
||||||
|
|
||||||
typedef Rune TrieKey;
|
|
||||||
|
|
||||||
class TrieNode {
|
|
||||||
public :
|
|
||||||
TrieNode(): next(NULL), ptValue(NULL) {
|
|
||||||
}
|
|
||||||
public:
|
|
||||||
typedef unordered_map<TrieKey, TrieNode*> NextMap;
|
|
||||||
NextMap *next;
|
|
||||||
const DictUnit *ptValue;
|
|
||||||
};
|
|
||||||
|
|
||||||
class Trie {
|
|
||||||
public:
|
|
||||||
Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
|
|
||||||
: root_(new TrieNode) {
|
|
||||||
CreateTrie(keys, valuePointers);
|
|
||||||
}
|
|
||||||
~Trie() {
|
|
||||||
DeleteNode(root_);
|
|
||||||
}
|
|
||||||
|
|
||||||
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
|
||||||
if(begin == end) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
const TrieNode* ptNode = root_;
|
|
||||||
TrieNode::NextMap::const_iterator citer;
|
|
||||||
for(RuneStrArray::const_iterator it = begin; it != end; it++) {
|
|
||||||
if(NULL == ptNode->next) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
citer = ptNode->next->find(it->rune);
|
|
||||||
if(ptNode->next->end() == citer) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
ptNode = citer->second;
|
|
||||||
}
|
|
||||||
return ptNode->ptValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Find(RuneStrArray::const_iterator begin,
|
|
||||||
RuneStrArray::const_iterator end,
|
|
||||||
vector<struct Dag>&res,
|
|
||||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
|
||||||
assert(root_ != NULL);
|
|
||||||
res.resize(end - begin);
|
|
||||||
|
|
||||||
const TrieNode *ptNode = NULL;
|
|
||||||
TrieNode::NextMap::const_iterator citer;
|
|
||||||
for(size_t i = 0; i < size_t(end - begin); i++) {
|
|
||||||
res[i].runestr = *(begin + i);
|
|
||||||
|
|
||||||
if(root_->next != NULL && root_->next->end() != (citer = root_->next->find(res[i].runestr.rune))) {
|
|
||||||
ptNode = citer->second;
|
|
||||||
} else {
|
|
||||||
ptNode = NULL;
|
|
||||||
}
|
|
||||||
if(ptNode != NULL) {
|
|
||||||
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, ptNode->ptValue));
|
|
||||||
} else {
|
|
||||||
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, static_cast<const DictUnit*>(NULL)));
|
|
||||||
}
|
|
||||||
|
|
||||||
for(size_t j = i + 1; j < size_t(end - begin) && (j - i + 1) <= max_word_len; j++) {
|
|
||||||
if(ptNode == NULL || ptNode->next == NULL) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
citer = ptNode->next->find((begin + j)->rune);
|
|
||||||
if(ptNode->next->end() == citer) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
ptNode = citer->second;
|
|
||||||
if(NULL != ptNode->ptValue) {
|
|
||||||
res[i].nexts.push_back(pair<size_t, const DictUnit*>(j, ptNode->ptValue));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void InsertNode(const Unicode& key, const DictUnit* ptValue) {
|
|
||||||
if(key.begin() == key.end()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
TrieNode::NextMap::const_iterator kmIter;
|
|
||||||
TrieNode *ptNode = root_;
|
|
||||||
for(Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) {
|
|
||||||
if(NULL == ptNode->next) {
|
|
||||||
ptNode->next = new TrieNode::NextMap;
|
|
||||||
}
|
|
||||||
kmIter = ptNode->next->find(*citer);
|
|
||||||
if(ptNode->next->end() == kmIter) {
|
|
||||||
TrieNode *nextNode = new TrieNode;
|
|
||||||
|
|
||||||
ptNode->next->insert(make_pair(*citer, nextNode));
|
|
||||||
ptNode = nextNode;
|
|
||||||
} else {
|
|
||||||
ptNode = kmIter->second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert(ptNode != NULL);
|
|
||||||
ptNode->ptValue = ptValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
void CreateTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
|
|
||||||
if(valuePointers.empty() || keys.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
assert(keys.size() == valuePointers.size());
|
|
||||||
|
|
||||||
for(size_t i = 0; i < keys.size(); i++) {
|
|
||||||
InsertNode(keys[i], valuePointers[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void DeleteNode(TrieNode* node) {
|
|
||||||
if(NULL == node) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if(NULL != node->next) {
|
|
||||||
for(TrieNode::NextMap::iterator it = node->next->begin(); it != node->next->end(); ++it) {
|
|
||||||
DeleteNode(it->second);
|
|
||||||
}
|
|
||||||
delete node->next;
|
|
||||||
}
|
|
||||||
delete node;
|
|
||||||
}
|
|
||||||
|
|
||||||
TrieNode* root_;
|
|
||||||
}; // class Trie
|
|
||||||
} // namespace cppjieba
|
|
||||||
|
|
||||||
#endif // CPPJIEBA_TRIE_HPP
|
|
|
@ -1,23 +1,4 @@
|
||||||
/*
|
#pragma once
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef CPPJIEBA_UNICODE_H
|
|
||||||
#define CPPJIEBA_UNICODE_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
@ -25,6 +6,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
#include "limonp/LocalVector.hpp"
|
#include "limonp/LocalVector.hpp"
|
||||||
|
#include "limonp/StringUtil.hpp"
|
||||||
|
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
|
|
||||||
|
@ -33,6 +15,12 @@ using std::vector;
|
||||||
|
|
||||||
typedef uint32_t Rune;
|
typedef uint32_t Rune;
|
||||||
|
|
||||||
|
struct KeyWord {
|
||||||
|
string word;
|
||||||
|
vector<size_t> offsets;
|
||||||
|
double weight;
|
||||||
|
}; // struct Word
|
||||||
|
|
||||||
struct Word {
|
struct Word {
|
||||||
string word;
|
string word;
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
|
@ -50,28 +38,28 @@ inline std::ostream& operator << (std::ostream& os, const Word& w) {
|
||||||
return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
|
return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
|
||||||
}
|
}
|
||||||
|
|
||||||
struct RuneStr {
|
struct RuneInfo {
|
||||||
Rune rune;
|
Rune rune;
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
uint32_t unicode_offset;
|
uint32_t unicode_offset = 0;
|
||||||
uint32_t unicode_length;
|
uint32_t unicode_length = 0;
|
||||||
RuneStr(): rune(0), offset(0), len(0), unicode_offset(0), unicode_length(0) {
|
RuneInfo(): rune(0), offset(0), len(0) {
|
||||||
}
|
}
|
||||||
RuneStr(Rune r, uint32_t o, uint32_t l)
|
RuneInfo(Rune r, uint32_t o, uint32_t l)
|
||||||
: rune(r), offset(o), len(l), unicode_offset(0), unicode_length(0) {
|
: rune(r), offset(o), len(l) {
|
||||||
}
|
}
|
||||||
RuneStr(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
|
RuneInfo(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
|
||||||
: rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
|
: rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
|
||||||
}
|
}
|
||||||
}; // struct RuneStr
|
}; // struct RuneInfo
|
||||||
|
|
||||||
inline std::ostream& operator << (std::ostream& os, const RuneStr& r) {
|
inline std::ostream& operator << (std::ostream& os, const RuneInfo& r) {
|
||||||
return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
|
return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef limonp::LocalVector<Rune> Unicode;
|
typedef limonp::LocalVector<Rune> RuneArray;
|
||||||
typedef limonp::LocalVector<struct RuneStr> RuneStrArray;
|
typedef limonp::LocalVector<struct RuneInfo> RuneStrArray;
|
||||||
|
|
||||||
// [left, right]
|
// [left, right]
|
||||||
struct WordRange {
|
struct WordRange {
|
||||||
|
@ -81,129 +69,157 @@ struct WordRange {
|
||||||
: left(l), right(r) {
|
: left(l), right(r) {
|
||||||
}
|
}
|
||||||
size_t Length() const {
|
size_t Length() const {
|
||||||
return right - left + 1;
|
return right - left;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsAllAscii() const {
|
bool IsAllAscii() const {
|
||||||
for(RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
|
for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
|
||||||
if(iter->rune >= 0x80) {
|
if (iter->rune >= 0x80) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}; // struct WordRange
|
}; // struct WordRange
|
||||||
|
|
||||||
struct RuneStrLite {
|
|
||||||
uint32_t rune;
|
|
||||||
uint32_t len;
|
|
||||||
RuneStrLite(): rune(0), len(0) {
|
|
||||||
}
|
|
||||||
RuneStrLite(uint32_t r, uint32_t l): rune(r), len(l) {
|
|
||||||
}
|
|
||||||
}; // struct RuneStrLite
|
|
||||||
|
|
||||||
inline RuneStrLite DecodeRuneInString(const char* str, size_t len) {
|
inline bool DecodeRunesInString(const string& s, RuneArray& arr) {
|
||||||
RuneStrLite rp(0, 0);
|
arr.clear();
|
||||||
if(str == NULL || len == 0) {
|
return limonp::Utf8ToUnicode32(s, arr);
|
||||||
return rp;
|
}
|
||||||
}
|
|
||||||
if(!(str[0] & 0x80)) { // 0xxxxxxx
|
inline RuneArray DecodeRunesInString(const string& s) {
|
||||||
|
RuneArray result;
|
||||||
|
DecodeRunesInString(s, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
//重写DecodeRunesInString函数,将实现放入函数中降低内存占用加快处理流程--jxx20210518
|
||||||
|
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
|
||||||
|
|
||||||
|
uint32_t tmp;
|
||||||
|
uint32_t offset = 0;
|
||||||
|
runes.clear();
|
||||||
|
uint32_t len(0);
|
||||||
|
for (size_t i = 0; i < s.size();) {
|
||||||
|
if (!(s.data()[i] & 0x80)) { // 0xxxxxxx
|
||||||
// 7bit, total 7bit
|
// 7bit, total 7bit
|
||||||
rp.rune = (uint8_t)(str[0]) & 0x7f;
|
tmp = (uint8_t)(s.data()[i]) & 0x7f;
|
||||||
rp.len = 1;
|
i++;
|
||||||
} else if((uint8_t)str[0] <= 0xdf && 1 < len) {
|
len = 1;
|
||||||
// 110xxxxxx
|
} else if ((uint8_t)s.data()[i] <= 0xdf && i + 1 < s.size()) { // 110xxxxxx
|
||||||
// 5bit, total 5bit
|
// 5bit, total 5bit
|
||||||
rp.rune = (uint8_t)(str[0]) & 0x1f;
|
tmp = (uint8_t)(s.data()[i]) & 0x1f;
|
||||||
|
|
||||||
// 6bit, total 11bit
|
// 6bit, total 11bit
|
||||||
rp.rune <<= 6;
|
tmp <<= 6;
|
||||||
rp.rune |= (uint8_t)(str[1]) & 0x3f;
|
tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
|
||||||
rp.len = 2;
|
i += 2;
|
||||||
} else if((uint8_t)str[0] <= 0xef && 2 < len) { // 1110xxxxxx
|
len = 2;
|
||||||
|
} else if((uint8_t)s.data()[i] <= 0xef && i + 2 < s.size()) { // 1110xxxxxx
|
||||||
// 4bit, total 4bit
|
// 4bit, total 4bit
|
||||||
rp.rune = (uint8_t)(str[0]) & 0x0f;
|
tmp = (uint8_t)(s.data()[i]) & 0x0f;
|
||||||
|
|
||||||
// 6bit, total 10bit
|
// 6bit, total 10bit
|
||||||
rp.rune <<= 6;
|
tmp <<= 6;
|
||||||
rp.rune |= (uint8_t)(str[1]) & 0x3f;
|
tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
|
||||||
|
|
||||||
// 6bit, total 16bit
|
// 6bit, total 16bit
|
||||||
rp.rune <<= 6;
|
tmp <<= 6;
|
||||||
rp.rune |= (uint8_t)(str[2]) & 0x3f;
|
tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
|
||||||
|
|
||||||
rp.len = 3;
|
i += 3;
|
||||||
} else if((uint8_t)str[0] <= 0xf7 && 3 < len) { // 11110xxxx
|
len = 3;
|
||||||
|
} else if((uint8_t)s.data()[i] <= 0xf7 && i + 3 < s.size()) { // 11110xxxx
|
||||||
// 3bit, total 3bit
|
// 3bit, total 3bit
|
||||||
rp.rune = (uint8_t)(str[0]) & 0x07;
|
tmp = (uint8_t)(s.data()[i]) & 0x07;
|
||||||
|
|
||||||
// 6bit, total 9bit
|
// 6bit, total 9bit
|
||||||
rp.rune <<= 6;
|
tmp <<= 6;
|
||||||
rp.rune |= (uint8_t)(str[1]) & 0x3f;
|
tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
|
||||||
|
|
||||||
// 6bit, total 15bit
|
// 6bit, total 15bit
|
||||||
rp.rune <<= 6;
|
tmp <<= 6;
|
||||||
rp.rune |= (uint8_t)(str[2]) & 0x3f;
|
tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
|
||||||
|
|
||||||
// 6bit, total 21bit
|
// 6bit, total 21bit
|
||||||
rp.rune <<= 6;
|
tmp <<= 6;
|
||||||
rp.rune |= (uint8_t)(str[3]) & 0x3f;
|
tmp |= (uint8_t)(s.data()[i+3]) & 0x3f;
|
||||||
|
|
||||||
rp.len = 4;
|
i += 4;
|
||||||
} else {
|
len = 4;
|
||||||
rp.rune = 0;
|
} else {
|
||||||
rp.len = 0;
|
|
||||||
}
|
|
||||||
return rp;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool DecodeRunesInString(const char* s, size_t len, RuneStrArray& runes) {
|
|
||||||
runes.clear();
|
|
||||||
runes.reserve(len / 2);
|
|
||||||
for(uint32_t i = 0, j = 0; i < len;) {
|
|
||||||
RuneStrLite rp = DecodeRuneInString(s + i, len - i);
|
|
||||||
if(rp.len == 0) {
|
|
||||||
runes.clear();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
RuneStr x(rp.rune, i, rp.len, j, 1);
|
|
||||||
runes.push_back(x);
|
|
||||||
i += rp.len;
|
|
||||||
++j;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
|
|
||||||
return DecodeRunesInString(s.c_str(), s.size(), runes);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
|
|
||||||
unicode.clear();
|
|
||||||
RuneStrArray runes;
|
|
||||||
if(!DecodeRunesInString(s, len, runes)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
unicode.reserve(runes.size());
|
RuneInfo x(tmp, offset, len, i, 1);
|
||||||
for(size_t i = 0; i < runes.size(); i++) {
|
runes.push_back(x);
|
||||||
unicode.push_back(runes[i].rune);
|
offset += len;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class RunePtrWrapper {
|
||||||
|
public:
|
||||||
|
const RuneInfo * m_ptr = nullptr;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit RunePtrWrapper(const RuneInfo * p) : m_ptr(p) {}
|
||||||
|
|
||||||
|
uint32_t operator *() {
|
||||||
|
return m_ptr->rune;
|
||||||
|
}
|
||||||
|
|
||||||
|
RunePtrWrapper operator ++(int) {
|
||||||
|
m_ptr ++;
|
||||||
|
return RunePtrWrapper(m_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator !=(const RunePtrWrapper & b) const {
|
||||||
|
return this->m_ptr != b.m_ptr;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline string EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) {
|
||||||
|
string str;
|
||||||
|
RunePtrWrapper it_begin(begin), it_end(end);
|
||||||
|
limonp::Unicode32ToUtf8(it_begin, it_end, str);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, string& str) {
|
||||||
|
RunePtrWrapper it_begin(begin), it_end(end);
|
||||||
|
limonp::Unicode32ToUtf8(it_begin, it_end, str);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
class Unicode32Counter {
|
||||||
|
public :
|
||||||
|
size_t length = 0;
|
||||||
|
void clear() {
|
||||||
|
length = 0;
|
||||||
|
}
|
||||||
|
void push_back(uint32_t) {
|
||||||
|
++length;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline size_t Utf8CharNum(const char * str, size_t length) {
|
||||||
|
Unicode32Counter c;
|
||||||
|
|
||||||
|
if (limonp::Utf8ToUnicode32(str, length, c)) {
|
||||||
|
return c.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t Utf8CharNum(const string & str) {
|
||||||
|
return Utf8CharNum(str.data(), str.size());
|
||||||
|
}
|
||||||
|
|
||||||
inline bool IsSingleWord(const string& str) {
|
inline bool IsSingleWord(const string& str) {
|
||||||
RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size());
|
return Utf8CharNum(str) == 1;
|
||||||
return rp.len == str.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool DecodeRunesInString(const string& s, Unicode& unicode) {
|
|
||||||
return DecodeRunesInString(s.c_str(), s.size(), unicode);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Unicode DecodeRunesInString(const string& s) {
|
|
||||||
Unicode result;
|
|
||||||
DecodeRunesInString(s, result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -217,29 +233,31 @@ inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left,
|
||||||
|
|
||||||
inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
|
inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
|
||||||
assert(right->offset >= left->offset);
|
assert(right->offset >= left->offset);
|
||||||
uint32_t len = right->offset - left->offset + right->len;
|
//uint32_t len = right->offset - left->offset + right->len;
|
||||||
return s.substr(left->offset, len);
|
return s.substr(left->offset, right->offset - left->offset + right->len);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
|
inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
|
||||||
for(size_t i = 0; i < wrs.size(); i++) {
|
for (size_t i = 0; i < wrs.size(); i++) {
|
||||||
words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
|
words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline vector<Word> GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs) {
|
inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
|
||||||
vector<Word> result;
|
for (size_t i = 0; i < wrs.size(); i++) {
|
||||||
GetWordsFromWordRanges(s, wrs, result);
|
words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
|
||||||
return result;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
|
inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
|
||||||
strs.resize(words.size());
|
strs.resize(words.size());
|
||||||
for(size_t i = 0; i < words.size(); ++i) {
|
|
||||||
|
for (size_t i = 0; i < words.size(); ++i) {
|
||||||
strs[i] = words[i].word;
|
strs[i] = words[i].word;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const size_t MAX_WORD_LENGTH = 512;
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif // CPPJIEBA_UNICODE_H
|
|
||||||
|
|
|
@ -2,6 +2,7 @@ INCLUDEPATH += $$PWD
|
||||||
|
|
||||||
HEADERS += \
|
HEADERS += \
|
||||||
$$PWD/DictTrie.hpp \
|
$$PWD/DictTrie.hpp \
|
||||||
|
$$PWD/IdfTrie.hpp \
|
||||||
$$PWD/FullSegment.hpp \
|
$$PWD/FullSegment.hpp \
|
||||||
$$PWD/HMMModel.hpp \
|
$$PWD/HMMModel.hpp \
|
||||||
$$PWD/HMMSegment.hpp \
|
$$PWD/HMMSegment.hpp \
|
||||||
|
@ -17,5 +18,4 @@ HEADERS += \
|
||||||
$$PWD/TextRankExtractor.hpp \
|
$$PWD/TextRankExtractor.hpp \
|
||||||
$$PWD/Trie.hpp \
|
$$PWD/Trie.hpp \
|
||||||
$$PWD/Unicode.hpp
|
$$PWD/Unicode.hpp
|
||||||
|
|
||||||
include(limonp/limonp.pri)
|
include(limonp/limonp.pri)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
/************************************
|
/************************************
|
||||||
* file enc : ascii
|
* file enc : ascii
|
||||||
* author : wuyanyi09@gmail.com
|
* author : wuyanyi09@gmail.com
|
||||||
|
@ -33,54 +15,54 @@ namespace limonp {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class ArgvContext {
|
class ArgvContext {
|
||||||
public :
|
public :
|
||||||
ArgvContext(int argc, const char* const * argv) {
|
ArgvContext(int argc, const char* const * argv) {
|
||||||
for(int i = 0; i < argc; i++) {
|
for(int i = 0; i < argc; i++) {
|
||||||
if(StartsWith(argv[i], "-")) {
|
if(StartsWith(argv[i], "-")) {
|
||||||
if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
|
if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
|
||||||
mpss_[argv[i]] = argv[i + 1];
|
mpss_[argv[i]] = argv[i+1];
|
||||||
i++;
|
i++;
|
||||||
} else {
|
} else {
|
||||||
sset_.insert(argv[i]);
|
sset_.insert(argv[i]);
|
||||||
}
|
|
||||||
} else {
|
|
||||||
args_.push_back(argv[i]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
args_.push_back(argv[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
~ArgvContext() {
|
}
|
||||||
}
|
~ArgvContext() {
|
||||||
|
}
|
||||||
|
|
||||||
friend ostream& operator << (ostream& os, const ArgvContext& args);
|
friend ostream& operator << (ostream& os, const ArgvContext& args);
|
||||||
string operator [](size_t i) const {
|
string operator [](size_t i) const {
|
||||||
if(i < args_.size()) {
|
if(i < args_.size()) {
|
||||||
return args_[i];
|
return args_[i];
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
}
|
||||||
string operator [](const string& key) const {
|
return "";
|
||||||
map<string, string>::const_iterator it = mpss_.find(key);
|
}
|
||||||
if(it != mpss_.end()) {
|
string operator [](const string& key) const {
|
||||||
return it->second;
|
map<string, string>::const_iterator it = mpss_.find(key);
|
||||||
}
|
if(it != mpss_.end()) {
|
||||||
return "";
|
return it->second;
|
||||||
}
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
bool HasKey(const string& key) const {
|
bool HasKey(const string& key) const {
|
||||||
if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
|
if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
vector<string> args_;
|
vector<string> args_;
|
||||||
map<string, string> mpss_;
|
map<string, string> mpss_;
|
||||||
set<string> sset_;
|
set<string> sset_;
|
||||||
}; // class ArgvContext
|
}; // class ArgvContext
|
||||||
|
|
||||||
inline ostream& operator << (ostream& os, const ArgvContext& args) {
|
inline ostream& operator << (ostream& os, const ArgvContext& args) {
|
||||||
return os << args.args_ << args.mpss_ << args.sset_;
|
return os<<args.args_<<args.mpss_<<args.sset_;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_BLOCKINGQUEUE_HPP
|
#ifndef LIMONP_BLOCKINGQUEUE_HPP
|
||||||
#define LIMONP_BLOCKINGQUEUE_HPP
|
#define LIMONP_BLOCKINGQUEUE_HPP
|
||||||
|
|
||||||
|
@ -25,41 +7,41 @@
|
||||||
namespace limonp {
|
namespace limonp {
|
||||||
template<class T>
|
template<class T>
|
||||||
class BlockingQueue: NonCopyable {
|
class BlockingQueue: NonCopyable {
|
||||||
public:
|
public:
|
||||||
BlockingQueue()
|
BlockingQueue()
|
||||||
: mutex_(), notEmpty_(mutex_), queue_() {
|
: mutex_(), notEmpty_(mutex_), queue_() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Push(const T& x) {
|
void Push(const T& x) {
|
||||||
MutexLockGuard lock(mutex_);
|
MutexLockGuard lock(mutex_);
|
||||||
queue_.push(x);
|
queue_.push(x);
|
||||||
notEmpty_.Notify(); // Wait morphing saves us
|
notEmpty_.Notify(); // Wait morphing saves us
|
||||||
}
|
}
|
||||||
|
|
||||||
T Pop() {
|
T Pop() {
|
||||||
MutexLockGuard lock(mutex_);
|
MutexLockGuard lock(mutex_);
|
||||||
// always use a while-loop, due to spurious wakeup
|
// always use a while-loop, due to spurious wakeup
|
||||||
while(queue_.empty()) {
|
while (queue_.empty()) {
|
||||||
notEmpty_.Wait();
|
notEmpty_.Wait();
|
||||||
}
|
|
||||||
assert(!queue_.empty());
|
|
||||||
T front(queue_.front());
|
|
||||||
queue_.pop();
|
|
||||||
return front;
|
|
||||||
}
|
}
|
||||||
|
assert(!queue_.empty());
|
||||||
|
T front(queue_.front());
|
||||||
|
queue_.pop();
|
||||||
|
return front;
|
||||||
|
}
|
||||||
|
|
||||||
size_t Size() const {
|
size_t Size() const {
|
||||||
MutexLockGuard lock(mutex_);
|
MutexLockGuard lock(mutex_);
|
||||||
return queue_.size();
|
return queue_.size();
|
||||||
}
|
}
|
||||||
bool Empty() const {
|
bool Empty() const {
|
||||||
return Size() == 0;
|
return Size() == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
mutable MutexLock mutex_;
|
mutable MutexLock mutex_;
|
||||||
Condition notEmpty_;
|
Condition notEmpty_;
|
||||||
std::queue<T> queue_;
|
std::queue<T> queue_;
|
||||||
}; // class BlockingQueue
|
}; // class BlockingQueue
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
|
#ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
|
||||||
#define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
|
#define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
|
||||||
|
|
||||||
|
@ -25,59 +7,59 @@ namespace limonp {
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
class BoundedBlockingQueue : NonCopyable {
|
class BoundedBlockingQueue : NonCopyable {
|
||||||
public:
|
public:
|
||||||
explicit BoundedBlockingQueue(size_t maxSize)
|
explicit BoundedBlockingQueue(size_t maxSize)
|
||||||
: mutex_(),
|
: mutex_(),
|
||||||
notEmpty_(mutex_),
|
notEmpty_(mutex_),
|
||||||
notFull_(mutex_),
|
notFull_(mutex_),
|
||||||
queue_(maxSize) {
|
queue_(maxSize) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Push(const T& x) {
|
void Push(const T& x) {
|
||||||
MutexLockGuard lock(mutex_);
|
MutexLockGuard lock(mutex_);
|
||||||
while(queue_.Full()) {
|
while (queue_.Full()) {
|
||||||
notFull_.Wait();
|
notFull_.Wait();
|
||||||
}
|
|
||||||
assert(!queue_.Full());
|
|
||||||
queue_.Push(x);
|
|
||||||
notEmpty_.Notify();
|
|
||||||
}
|
}
|
||||||
|
assert(!queue_.Full());
|
||||||
|
queue_.Push(x);
|
||||||
|
notEmpty_.Notify();
|
||||||
|
}
|
||||||
|
|
||||||
T Pop() {
|
T Pop() {
|
||||||
MutexLockGuard lock(mutex_);
|
MutexLockGuard lock(mutex_);
|
||||||
while(queue_.Empty()) {
|
while (queue_.Empty()) {
|
||||||
notEmpty_.Wait();
|
notEmpty_.Wait();
|
||||||
}
|
|
||||||
assert(!queue_.Empty());
|
|
||||||
T res = queue_.Pop();
|
|
||||||
notFull_.Notify();
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
assert(!queue_.Empty());
|
||||||
|
T res = queue_.Pop();
|
||||||
|
notFull_.Notify();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
bool Empty() const {
|
bool Empty() const {
|
||||||
MutexLockGuard lock(mutex_);
|
MutexLockGuard lock(mutex_);
|
||||||
return queue_.Empty();
|
return queue_.Empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Full() const {
|
bool Full() const {
|
||||||
MutexLockGuard lock(mutex_);
|
MutexLockGuard lock(mutex_);
|
||||||
return queue_.Full();
|
return queue_.Full();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t size() const {
|
size_t size() const {
|
||||||
MutexLockGuard lock(mutex_);
|
MutexLockGuard lock(mutex_);
|
||||||
return queue_.size();
|
return queue_.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t capacity() const {
|
size_t capacity() const {
|
||||||
return queue_.capacity();
|
return queue_.capacity();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
mutable MutexLock mutex_;
|
mutable MutexLock mutex_;
|
||||||
Condition notEmpty_;
|
Condition notEmpty_;
|
||||||
Condition notFull_;
|
Condition notFull_;
|
||||||
BoundedQueue<T> queue_;
|
BoundedQueue<T> queue_;
|
||||||
}; // class BoundedBlockingQueue
|
}; // class BoundedBlockingQueue
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_BOUNDED_QUEUE_HPP
|
#ifndef LIMONP_BOUNDED_QUEUE_HPP
|
||||||
#define LIMONP_BOUNDED_QUEUE_HPP
|
#define LIMONP_BOUNDED_QUEUE_HPP
|
||||||
|
|
||||||
|
@ -27,55 +9,55 @@ namespace limonp {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
template<class T>
|
template<class T>
|
||||||
class BoundedQueue {
|
class BoundedQueue {
|
||||||
public:
|
public:
|
||||||
explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
|
explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
|
||||||
head_ = 0;
|
head_ = 0;
|
||||||
tail_ = 0;
|
tail_ = 0;
|
||||||
size_ = 0;
|
size_ = 0;
|
||||||
assert(capacity_);
|
assert(capacity_);
|
||||||
}
|
}
|
||||||
~BoundedQueue() {
|
~BoundedQueue() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Clear() {
|
void Clear() {
|
||||||
head_ = 0;
|
head_ = 0;
|
||||||
tail_ = 0;
|
tail_ = 0;
|
||||||
size_ = 0;
|
size_ = 0;
|
||||||
}
|
}
|
||||||
bool Empty() const {
|
bool Empty() const {
|
||||||
return !size_;
|
return !size_;
|
||||||
}
|
}
|
||||||
bool Full() const {
|
bool Full() const {
|
||||||
return capacity_ == size_;
|
return capacity_ == size_;
|
||||||
}
|
}
|
||||||
size_t Size() const {
|
size_t Size() const {
|
||||||
return size_;
|
return size_;
|
||||||
}
|
}
|
||||||
size_t Capacity() const {
|
size_t Capacity() const {
|
||||||
return capacity_;
|
return capacity_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Push(const T& t) {
|
void Push(const T& t) {
|
||||||
assert(!Full());
|
assert(!Full());
|
||||||
circular_buffer_[tail_] = t;
|
circular_buffer_[tail_] = t;
|
||||||
tail_ = (tail_ + 1) % capacity_;
|
tail_ = (tail_ + 1) % capacity_;
|
||||||
size_ ++;
|
size_ ++;
|
||||||
}
|
}
|
||||||
|
|
||||||
T Pop() {
|
T Pop() {
|
||||||
assert(!Empty());
|
assert(!Empty());
|
||||||
size_t oldPos = head_;
|
size_t oldPos = head_;
|
||||||
head_ = (head_ + 1) % capacity_;
|
head_ = (head_ + 1) % capacity_;
|
||||||
size_ --;
|
size_ --;
|
||||||
return circular_buffer_[oldPos];
|
return circular_buffer_[oldPos];
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t head_;
|
size_t head_;
|
||||||
size_t tail_;
|
size_t tail_;
|
||||||
size_t size_;
|
size_t size_;
|
||||||
const size_t capacity_;
|
const size_t capacity_;
|
||||||
vector<T> circular_buffer_;
|
vector<T> circular_buffer_;
|
||||||
|
|
||||||
}; // class BoundedQueue
|
}; // class BoundedQueue
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,222 +1,204 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_CLOSURE_HPP
|
#ifndef LIMONP_CLOSURE_HPP
|
||||||
#define LIMONP_CLOSURE_HPP
|
#define LIMONP_CLOSURE_HPP
|
||||||
|
|
||||||
namespace limonp {
|
namespace limonp {
|
||||||
|
|
||||||
class ClosureInterface {
|
class ClosureInterface {
|
||||||
public:
|
public:
|
||||||
virtual ~ClosureInterface() {
|
virtual ~ClosureInterface() {
|
||||||
}
|
}
|
||||||
virtual void Run() = 0;
|
virtual void Run() = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Funct>
|
template <class Funct>
|
||||||
class Closure0: public ClosureInterface {
|
class Closure0: public ClosureInterface {
|
||||||
public:
|
public:
|
||||||
Closure0(Funct fun) {
|
Closure0(Funct fun) {
|
||||||
fun_ = fun;
|
fun_ = fun;
|
||||||
}
|
}
|
||||||
virtual ~Closure0() {
|
virtual ~Closure0() {
|
||||||
}
|
}
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
(*fun_)();
|
(*fun_)();
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
Funct fun_;
|
Funct fun_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Funct, class Arg1>
|
template <class Funct, class Arg1>
|
||||||
class Closure1: public ClosureInterface {
|
class Closure1: public ClosureInterface {
|
||||||
public:
|
public:
|
||||||
Closure1(Funct fun, Arg1 arg1) {
|
Closure1(Funct fun, Arg1 arg1) {
|
||||||
fun_ = fun;
|
fun_ = fun;
|
||||||
arg1_ = arg1;
|
arg1_ = arg1;
|
||||||
}
|
}
|
||||||
virtual ~Closure1() {
|
virtual ~Closure1() {
|
||||||
}
|
}
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
(*fun_)(arg1_);
|
(*fun_)(arg1_);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
Funct fun_;
|
Funct fun_;
|
||||||
Arg1 arg1_;
|
Arg1 arg1_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Funct, class Arg1, class Arg2>
|
template <class Funct, class Arg1, class Arg2>
|
||||||
class Closure2: public ClosureInterface {
|
class Closure2: public ClosureInterface {
|
||||||
public:
|
public:
|
||||||
Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
|
Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
|
||||||
fun_ = fun;
|
fun_ = fun;
|
||||||
arg1_ = arg1;
|
arg1_ = arg1;
|
||||||
arg2_ = arg2;
|
arg2_ = arg2;
|
||||||
}
|
}
|
||||||
virtual ~Closure2() {
|
virtual ~Closure2() {
|
||||||
}
|
}
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
(*fun_)(arg1_, arg2_);
|
(*fun_)(arg1_, arg2_);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
Funct fun_;
|
Funct fun_;
|
||||||
Arg1 arg1_;
|
Arg1 arg1_;
|
||||||
Arg2 arg2_;
|
Arg2 arg2_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Funct, class Arg1, class Arg2, class Arg3>
|
template <class Funct, class Arg1, class Arg2, class Arg3>
|
||||||
class Closure3: public ClosureInterface {
|
class Closure3: public ClosureInterface {
|
||||||
public:
|
public:
|
||||||
Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||||
fun_ = fun;
|
fun_ = fun;
|
||||||
arg1_ = arg1;
|
arg1_ = arg1;
|
||||||
arg2_ = arg2;
|
arg2_ = arg2;
|
||||||
arg3_ = arg3;
|
arg3_ = arg3;
|
||||||
}
|
}
|
||||||
virtual ~Closure3() {
|
virtual ~Closure3() {
|
||||||
}
|
}
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
(*fun_)(arg1_, arg2_, arg3_);
|
(*fun_)(arg1_, arg2_, arg3_);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
Funct fun_;
|
Funct fun_;
|
||||||
Arg1 arg1_;
|
Arg1 arg1_;
|
||||||
Arg2 arg2_;
|
Arg2 arg2_;
|
||||||
Arg3 arg3_;
|
Arg3 arg3_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Obj, class Funct>
|
template <class Obj, class Funct>
|
||||||
class ObjClosure0: public ClosureInterface {
|
class ObjClosure0: public ClosureInterface {
|
||||||
public:
|
public:
|
||||||
ObjClosure0(Obj* p, Funct fun) {
|
ObjClosure0(Obj* p, Funct fun) {
|
||||||
p_ = p;
|
p_ = p;
|
||||||
fun_ = fun;
|
fun_ = fun;
|
||||||
}
|
}
|
||||||
virtual ~ObjClosure0() {
|
virtual ~ObjClosure0() {
|
||||||
}
|
}
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
(p_->*fun_)();
|
(p_->*fun_)();
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
Obj* p_;
|
Obj* p_;
|
||||||
Funct fun_;
|
Funct fun_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Obj, class Funct, class Arg1>
|
template <class Obj, class Funct, class Arg1>
|
||||||
class ObjClosure1: public ClosureInterface {
|
class ObjClosure1: public ClosureInterface {
|
||||||
public:
|
public:
|
||||||
ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
|
ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
|
||||||
p_ = p;
|
p_ = p;
|
||||||
fun_ = fun;
|
fun_ = fun;
|
||||||
arg1_ = arg1;
|
arg1_ = arg1;
|
||||||
}
|
}
|
||||||
virtual ~ObjClosure1() {
|
virtual ~ObjClosure1() {
|
||||||
}
|
}
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
(p_->*fun_)(arg1_);
|
(p_->*fun_)(arg1_);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
Obj* p_;
|
Obj* p_;
|
||||||
Funct fun_;
|
Funct fun_;
|
||||||
Arg1 arg1_;
|
Arg1 arg1_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Obj, class Funct, class Arg1, class Arg2>
|
template <class Obj, class Funct, class Arg1, class Arg2>
|
||||||
class ObjClosure2: public ClosureInterface {
|
class ObjClosure2: public ClosureInterface {
|
||||||
public:
|
public:
|
||||||
ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
|
ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
|
||||||
p_ = p;
|
p_ = p;
|
||||||
fun_ = fun;
|
fun_ = fun;
|
||||||
arg1_ = arg1;
|
arg1_ = arg1;
|
||||||
arg2_ = arg2;
|
arg2_ = arg2;
|
||||||
}
|
}
|
||||||
virtual ~ObjClosure2() {
|
virtual ~ObjClosure2() {
|
||||||
}
|
}
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
(p_->*fun_)(arg1_, arg2_);
|
(p_->*fun_)(arg1_, arg2_);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
Obj* p_;
|
Obj* p_;
|
||||||
Funct fun_;
|
Funct fun_;
|
||||||
Arg1 arg1_;
|
Arg1 arg1_;
|
||||||
Arg2 arg2_;
|
Arg2 arg2_;
|
||||||
};
|
};
|
||||||
template <class Obj, class Funct, class Arg1, class Arg2, class Arg3>
|
template <class Obj, class Funct, class Arg1, class Arg2, class Arg3>
|
||||||
class ObjClosure3: public ClosureInterface {
|
class ObjClosure3: public ClosureInterface {
|
||||||
public:
|
public:
|
||||||
ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||||
p_ = p;
|
p_ = p;
|
||||||
fun_ = fun;
|
fun_ = fun;
|
||||||
arg1_ = arg1;
|
arg1_ = arg1;
|
||||||
arg2_ = arg2;
|
arg2_ = arg2;
|
||||||
arg3_ = arg3;
|
arg3_ = arg3;
|
||||||
}
|
}
|
||||||
virtual ~ObjClosure3() {
|
virtual ~ObjClosure3() {
|
||||||
}
|
}
|
||||||
virtual void Run() {
|
virtual void Run() {
|
||||||
(p_->*fun_)(arg1_, arg2_, arg3_);
|
(p_->*fun_)(arg1_, arg2_, arg3_);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
Obj* p_;
|
Obj* p_;
|
||||||
Funct fun_;
|
Funct fun_;
|
||||||
Arg1 arg1_;
|
Arg1 arg1_;
|
||||||
Arg2 arg2_;
|
Arg2 arg2_;
|
||||||
Arg3 arg3_;
|
Arg3 arg3_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class R>
|
template<class R>
|
||||||
ClosureInterface* NewClosure(R(*fun)()) {
|
ClosureInterface* NewClosure(R (*fun)()) {
|
||||||
return new Closure0<R(*)()>(fun);
|
return new Closure0<R (*)()>(fun);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class R, class Arg1>
|
template<class R, class Arg1>
|
||||||
ClosureInterface* NewClosure(R(*fun)(Arg1), Arg1 arg1) {
|
ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) {
|
||||||
return new Closure1<R(*)(Arg1), Arg1>(fun, arg1);
|
return new Closure1<R (*)(Arg1), Arg1>(fun, arg1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class R, class Arg1, class Arg2>
|
template<class R, class Arg1, class Arg2>
|
||||||
ClosureInterface* NewClosure(R(*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
|
ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
|
||||||
return new Closure2<R(*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
|
return new Closure2<R (*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class R, class Arg1, class Arg2, class Arg3>
|
template<class R, class Arg1, class Arg2, class Arg3>
|
||||||
ClosureInterface* NewClosure(R(*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||||
return new Closure3<R(*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
|
return new Closure3<R (*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class R, class Obj>
|
template<class R, class Obj>
|
||||||
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)()) {
|
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) {
|
||||||
return new ObjClosure0<Obj, R(Obj::*)()>(obj, fun);
|
return new ObjClosure0<Obj, R (Obj::* )()>(obj, fun);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class R, class Obj, class Arg1>
|
template<class R, class Obj, class Arg1>
|
||||||
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1), Arg1 arg1) {
|
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) {
|
||||||
return new ObjClosure1<Obj, R(Obj::*)(Arg1), Arg1>(obj, fun, arg1);
|
return new ObjClosure1<Obj, R (Obj::* )(Arg1), Arg1>(obj, fun, arg1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class R, class Obj, class Arg1, class Arg2>
|
template<class R, class Obj, class Arg1, class Arg2>
|
||||||
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
|
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
|
||||||
return new ObjClosure2<Obj, R(Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
|
return new ObjClosure2<Obj, R (Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class R, class Obj, class Arg1, class Arg2, class Arg3>
|
template<class R, class Obj, class Arg1, class Arg2, class Arg3>
|
||||||
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||||
return new ObjClosure3<Obj, R(Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
|
return new ObjClosure3<Obj, R (Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_COLOR_PRINT_HPP
|
#ifndef LIMONP_COLOR_PRINT_HPP
|
||||||
#define LIMONP_COLOR_PRINT_HPP
|
#define LIMONP_COLOR_PRINT_HPP
|
||||||
|
|
||||||
|
@ -27,21 +9,21 @@ namespace limonp {
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
enum Color {
|
enum Color {
|
||||||
BLACK = 30,
|
BLACK = 30,
|
||||||
RED,
|
RED,
|
||||||
GREEN,
|
GREEN,
|
||||||
YELLOW,
|
YELLOW,
|
||||||
BLUE,
|
BLUE,
|
||||||
PURPLE
|
PURPLE
|
||||||
}; // enum Color
|
}; // enum Color
|
||||||
|
|
||||||
static void ColorPrintln(enum Color color, const char * fmt, ...) {
|
static void ColorPrintln(enum Color color, const char * fmt, ...) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
printf("\033[0;%dm", color);
|
printf("\033[0;%dm", color);
|
||||||
va_start(ap, fmt);
|
va_start(ap, fmt);
|
||||||
vprintf(fmt, ap);
|
vprintf(fmt, ap);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
|
printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_CONDITION_HPP
|
#ifndef LIMONP_CONDITION_HPP
|
||||||
#define LIMONP_CONDITION_HPP
|
#define LIMONP_CONDITION_HPP
|
||||||
|
|
||||||
|
@ -24,31 +6,31 @@
|
||||||
namespace limonp {
|
namespace limonp {
|
||||||
|
|
||||||
class Condition : NonCopyable {
|
class Condition : NonCopyable {
|
||||||
public:
|
public:
|
||||||
explicit Condition(MutexLock& mutex)
|
explicit Condition(MutexLock& mutex)
|
||||||
: mutex_(mutex) {
|
: mutex_(mutex) {
|
||||||
XCHECK(!pthread_cond_init(&pcond_, NULL));
|
XCHECK(!pthread_cond_init(&pcond_, NULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
~Condition() {
|
~Condition() {
|
||||||
XCHECK(!pthread_cond_destroy(&pcond_));
|
XCHECK(!pthread_cond_destroy(&pcond_));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Wait() {
|
void Wait() {
|
||||||
XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
|
XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Notify() {
|
void Notify() {
|
||||||
XCHECK(!pthread_cond_signal(&pcond_));
|
XCHECK(!pthread_cond_signal(&pcond_));
|
||||||
}
|
}
|
||||||
|
|
||||||
void NotifyAll() {
|
void NotifyAll() {
|
||||||
XCHECK(!pthread_cond_broadcast(&pcond_));
|
XCHECK(!pthread_cond_broadcast(&pcond_));
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MutexLock& mutex_;
|
MutexLock& mutex_;
|
||||||
pthread_cond_t pcond_;
|
pthread_cond_t pcond_;
|
||||||
}; // class Condition
|
}; // class Condition
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
/************************************
|
/************************************
|
||||||
* file enc : utf8
|
* file enc : utf8
|
||||||
* author : wuyanyi09@gmail.com
|
* author : wuyanyi09@gmail.com
|
||||||
|
@ -34,86 +16,86 @@ namespace limonp {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class Config {
|
class Config {
|
||||||
public:
|
public:
|
||||||
explicit Config(const string& filePath) {
|
explicit Config(const string& filePath) {
|
||||||
LoadFile(filePath);
|
LoadFile(filePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
operator bool () {
|
operator bool () {
|
||||||
return !map_.empty();
|
return !map_.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
string Get(const string& key, const string& defaultvalue) const {
|
string Get(const string& key, const string& defaultvalue) const {
|
||||||
map<string, string>::const_iterator it = map_.find(key);
|
map<string, string>::const_iterator it = map_.find(key);
|
||||||
if(map_.end() != it) {
|
if(map_.end() != it) {
|
||||||
return it->second;
|
return it->second;
|
||||||
}
|
|
||||||
return defaultvalue;
|
|
||||||
}
|
}
|
||||||
int Get(const string& key, int defaultvalue) const {
|
return defaultvalue;
|
||||||
string str = Get(key, "");
|
}
|
||||||
if("" == str) {
|
int Get(const string& key, int defaultvalue) const {
|
||||||
return defaultvalue;
|
string str = Get(key, "");
|
||||||
}
|
if("" == str) {
|
||||||
return atoi(str.c_str());
|
return defaultvalue;
|
||||||
}
|
}
|
||||||
const char* operator [](const char* key) const {
|
return atoi(str.c_str());
|
||||||
if(NULL == key) {
|
}
|
||||||
return NULL;
|
const char* operator [] (const char* key) const {
|
||||||
}
|
if(NULL == key) {
|
||||||
map<string, string>::const_iterator it = map_.find(key);
|
return NULL;
|
||||||
if(map_.end() != it) {
|
|
||||||
return it->second.c_str();
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
map<string, string>::const_iterator it = map_.find(key);
|
||||||
|
if(map_.end() != it) {
|
||||||
|
return it->second.c_str();
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
string GetConfigInfo() const {
|
string GetConfigInfo() const {
|
||||||
string res;
|
string res;
|
||||||
res << *this;
|
res << *this;
|
||||||
return res;
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void LoadFile(const string& filePath) {
|
||||||
|
ifstream ifs(filePath.c_str());
|
||||||
|
assert(ifs);
|
||||||
|
string line;
|
||||||
|
vector<string> vecBuf;
|
||||||
|
size_t lineno = 0;
|
||||||
|
while(getline(ifs, line)) {
|
||||||
|
lineno ++;
|
||||||
|
Trim(line);
|
||||||
|
if(line.empty() || StartsWith(line, "#")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
vecBuf.clear();
|
||||||
|
Split(line, vecBuf, "=");
|
||||||
|
if(2 != vecBuf.size()) {
|
||||||
|
fprintf(stderr, "line[%s] illegal.\n", line.c_str());
|
||||||
|
assert(false);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
string& key = vecBuf[0];
|
||||||
|
string& value = vecBuf[1];
|
||||||
|
Trim(key);
|
||||||
|
Trim(value);
|
||||||
|
if(!map_.insert(make_pair(key, value)).second) {
|
||||||
|
fprintf(stderr, "key[%s] already exits.\n", key.c_str());
|
||||||
|
assert(false);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
ifs.close();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
friend ostream& operator << (ostream& os, const Config& config);
|
||||||
void LoadFile(const string& filePath) {
|
|
||||||
ifstream ifs(filePath.c_str());
|
|
||||||
assert(ifs);
|
|
||||||
string line;
|
|
||||||
vector<string> vecBuf;
|
|
||||||
size_t lineno = 0;
|
|
||||||
while(getline(ifs, line)) {
|
|
||||||
lineno ++;
|
|
||||||
Trim(line);
|
|
||||||
if(line.empty() || StartsWith(line, "#")) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
vecBuf.clear();
|
|
||||||
Split(line, vecBuf, "=");
|
|
||||||
if(2 != vecBuf.size()) {
|
|
||||||
fprintf(stderr, "line[%s] illegal.\n", line.c_str());
|
|
||||||
assert(false);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
string& key = vecBuf[0];
|
|
||||||
string& value = vecBuf[1];
|
|
||||||
Trim(key);
|
|
||||||
Trim(value);
|
|
||||||
if(!map_.insert(make_pair(key, value)).second) {
|
|
||||||
fprintf(stderr, "key[%s] already exits.\n", key.c_str());
|
|
||||||
assert(false);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ifs.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
friend ostream& operator << (ostream& os, const Config& config);
|
map<string, string> map_;
|
||||||
|
|
||||||
map<string, string> map_;
|
|
||||||
}; // class Config
|
}; // class Config
|
||||||
|
|
||||||
inline ostream& operator << (ostream& os, const Config& config) {
|
inline ostream& operator << (ostream& os, const Config& config) {
|
||||||
return os << config.map_;
|
return os << config.map_;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_FILELOCK_HPP
|
#ifndef LIMONP_FILELOCK_HPP
|
||||||
#define LIMONP_FILELOCK_HPP
|
#define LIMONP_FILELOCK_HPP
|
||||||
|
|
||||||
|
@ -33,58 +15,58 @@ namespace limonp {
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
class FileLock {
|
class FileLock {
|
||||||
public:
|
public:
|
||||||
FileLock() : fd_(-1), ok_(true) {
|
FileLock() : fd_(-1), ok_(true) {
|
||||||
|
}
|
||||||
|
~FileLock() {
|
||||||
|
if(fd_ > 0) {
|
||||||
|
Close();
|
||||||
}
|
}
|
||||||
~FileLock() {
|
}
|
||||||
if(fd_ > 0) {
|
void Open(const string& fname) {
|
||||||
Close();
|
assert(fd_ == -1);
|
||||||
}
|
fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
|
||||||
|
if(fd_ < 0) {
|
||||||
|
ok_ = false;
|
||||||
|
err_ = strerror(errno);
|
||||||
}
|
}
|
||||||
void Open(const string& fname) {
|
}
|
||||||
assert(fd_ == -1);
|
void Close() {
|
||||||
fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
|
::close(fd_);
|
||||||
if(fd_ < 0) {
|
}
|
||||||
ok_ = false;
|
void Lock() {
|
||||||
err_ = strerror(errno);
|
if(LockOrUnlock(fd_, true) < 0) {
|
||||||
}
|
ok_ = false;
|
||||||
|
err_ = strerror(errno);
|
||||||
}
|
}
|
||||||
void Close() {
|
}
|
||||||
::close(fd_);
|
void UnLock() {
|
||||||
}
|
if(LockOrUnlock(fd_, false) < 0) {
|
||||||
void Lock() {
|
ok_ = false;
|
||||||
if(LockOrUnlock(fd_, true) < 0) {
|
err_ = strerror(errno);
|
||||||
ok_ = false;
|
|
||||||
err_ = strerror(errno);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void UnLock() {
|
|
||||||
if(LockOrUnlock(fd_, false) < 0) {
|
|
||||||
ok_ = false;
|
|
||||||
err_ = strerror(errno);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bool Ok() const {
|
|
||||||
return ok_;
|
|
||||||
}
|
|
||||||
string Error() const {
|
|
||||||
return err_;
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
static int LockOrUnlock(int fd, bool lock) {
|
|
||||||
errno = 0;
|
|
||||||
struct flock f;
|
|
||||||
memset(&f, 0, sizeof(f));
|
|
||||||
f.l_type = (lock ? F_WRLCK : F_UNLCK);
|
|
||||||
f.l_whence = SEEK_SET;
|
|
||||||
f.l_start = 0;
|
|
||||||
f.l_len = 0; // Lock/unlock entire file
|
|
||||||
return fcntl(fd, F_SETLK, &f);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
bool Ok() const {
|
||||||
|
return ok_;
|
||||||
|
}
|
||||||
|
string Error() const {
|
||||||
|
return err_;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
static int LockOrUnlock(int fd, bool lock) {
|
||||||
|
errno = 0;
|
||||||
|
struct flock f;
|
||||||
|
memset(&f, 0, sizeof(f));
|
||||||
|
f.l_type = (lock ? F_WRLCK : F_UNLCK);
|
||||||
|
f.l_whence = SEEK_SET;
|
||||||
|
f.l_start = 0;
|
||||||
|
f.l_len = 0; // Lock/unlock entire file
|
||||||
|
return fcntl(fd, F_SETLK, &f);
|
||||||
|
}
|
||||||
|
|
||||||
int fd_;
|
int fd_;
|
||||||
bool ok_;
|
bool ok_;
|
||||||
string err_;
|
string err_;
|
||||||
}; // class FileLock
|
}; // class FileLock
|
||||||
|
|
||||||
}// namespace limonp
|
}// namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_FORCE_PUBLIC_H
|
#ifndef LIMONP_FORCE_PUBLIC_H
|
||||||
#define LIMONP_FORCE_PUBLIC_H
|
#define LIMONP_FORCE_PUBLIC_H
|
||||||
|
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_LOCAL_VECTOR_HPP
|
#ifndef LIMONP_LOCAL_VECTOR_HPP
|
||||||
#define LIMONP_LOCAL_VECTOR_HPP
|
#define LIMONP_LOCAL_VECTOR_HPP
|
||||||
|
|
||||||
|
@ -33,123 +15,126 @@ using namespace std;
|
||||||
const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
|
const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
|
||||||
template <class T>
|
template <class T>
|
||||||
class LocalVector {
|
class LocalVector {
|
||||||
public:
|
public:
|
||||||
typedef const T* const_iterator ;
|
typedef const T* const_iterator ;
|
||||||
typedef T value_type;
|
typedef T value_type;
|
||||||
typedef size_t size_type;
|
typedef size_t size_type;
|
||||||
private:
|
private:
|
||||||
T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
|
T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
|
||||||
T * ptr_;
|
T * ptr_;
|
||||||
size_t size_;
|
size_t size_;
|
||||||
size_t capacity_;
|
size_t capacity_;
|
||||||
public:
|
public:
|
||||||
LocalVector() {
|
LocalVector() {
|
||||||
init_();
|
init_();
|
||||||
};
|
};
|
||||||
LocalVector(const LocalVector<T>& vec) {
|
LocalVector(const LocalVector<T>& vec) {
|
||||||
init_();
|
init_();
|
||||||
*this = vec;
|
*this = vec;
|
||||||
|
}
|
||||||
|
LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster
|
||||||
|
init_();
|
||||||
|
while(begin != end) {
|
||||||
|
push_back(*begin++);
|
||||||
}
|
}
|
||||||
LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster
|
}
|
||||||
init_();
|
LocalVector(size_t size, const T& t) { // TODO: make it faster
|
||||||
while(begin != end) {
|
init_();
|
||||||
push_back(*begin++);
|
while(size--) {
|
||||||
}
|
push_back(t);
|
||||||
}
|
}
|
||||||
LocalVector(size_t size, const T& t) { // TODO: make it faster
|
}
|
||||||
init_();
|
~LocalVector() {
|
||||||
while(size--) {
|
if(ptr_ != buffer_) {
|
||||||
push_back(t);
|
free(ptr_);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
~LocalVector() {
|
};
|
||||||
if(ptr_ != buffer_) {
|
public:
|
||||||
free(ptr_);
|
LocalVector<T>& operator = (const LocalVector<T>& vec) {
|
||||||
}
|
if(this == &vec){
|
||||||
};
|
return *this;
|
||||||
public:
|
}
|
||||||
LocalVector<T>& operator = (const LocalVector<T>& vec) {
|
clear();
|
||||||
clear();
|
size_ = vec.size();
|
||||||
size_ = vec.size();
|
capacity_ = vec.capacity();
|
||||||
capacity_ = vec.capacity();
|
if(vec.buffer_ == vec.ptr_) {
|
||||||
if(vec.buffer_ == vec.ptr_) {
|
memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
|
||||||
memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
|
ptr_ = buffer_;
|
||||||
ptr_ = buffer_;
|
} else {
|
||||||
} else {
|
ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
|
||||||
ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
|
assert(ptr_);
|
||||||
assert(ptr_);
|
memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
|
||||||
memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
private:
|
return *this;
|
||||||
void init_() {
|
}
|
||||||
ptr_ = buffer_;
|
private:
|
||||||
size_ = 0;
|
void init_() {
|
||||||
capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
|
ptr_ = buffer_;
|
||||||
|
size_ = 0;
|
||||||
|
capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
T& operator [] (size_t i) {
|
||||||
|
return ptr_[i];
|
||||||
|
}
|
||||||
|
const T& operator [] (size_t i) const {
|
||||||
|
return ptr_[i];
|
||||||
|
}
|
||||||
|
void push_back(const T& t) {
|
||||||
|
if(size_ == capacity_) {
|
||||||
|
assert(capacity_);
|
||||||
|
reserve(capacity_ * 2);
|
||||||
}
|
}
|
||||||
public:
|
ptr_[size_ ++ ] = t;
|
||||||
T& operator [](size_t i) {
|
}
|
||||||
return ptr_[i];
|
void reserve(size_t size) {
|
||||||
|
if(size <= capacity_) {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
const T& operator [](size_t i) const {
|
T * next = (T*)malloc(sizeof(T) * size);
|
||||||
return ptr_[i];
|
assert(next);
|
||||||
|
T * old = ptr_;
|
||||||
|
ptr_ = next;
|
||||||
|
memcpy(ptr_, old, sizeof(T) * capacity_);
|
||||||
|
capacity_ = size;
|
||||||
|
if(old != buffer_) {
|
||||||
|
free(old);
|
||||||
}
|
}
|
||||||
void push_back(const T& t) {
|
}
|
||||||
if(size_ == capacity_) {
|
bool empty() const {
|
||||||
assert(capacity_);
|
return 0 == size();
|
||||||
reserve(capacity_ * 2);
|
}
|
||||||
}
|
size_t size() const {
|
||||||
ptr_[size_ ++ ] = t;
|
return size_;
|
||||||
}
|
}
|
||||||
void reserve(size_t size) {
|
size_t capacity() const {
|
||||||
if(size <= capacity_) {
|
return capacity_;
|
||||||
return;
|
}
|
||||||
}
|
const_iterator begin() const {
|
||||||
T * next = (T*)malloc(sizeof(T) * size);
|
return ptr_;
|
||||||
assert(next);
|
}
|
||||||
T * old = ptr_;
|
const_iterator end() const {
|
||||||
ptr_ = next;
|
return ptr_ + size_;
|
||||||
memcpy(ptr_, old, sizeof(T) * capacity_);
|
}
|
||||||
capacity_ = size;
|
void clear() {
|
||||||
if(old != buffer_) {
|
if(ptr_ != buffer_) {
|
||||||
free(old);
|
free(ptr_);
|
||||||
}
|
|
||||||
}
|
|
||||||
bool empty() const {
|
|
||||||
return 0 == size();
|
|
||||||
}
|
|
||||||
size_t size() const {
|
|
||||||
return size_;
|
|
||||||
}
|
|
||||||
size_t capacity() const {
|
|
||||||
return capacity_;
|
|
||||||
}
|
|
||||||
const_iterator begin() const {
|
|
||||||
return ptr_;
|
|
||||||
}
|
|
||||||
const_iterator end() const {
|
|
||||||
return ptr_ + size_;
|
|
||||||
}
|
|
||||||
void clear() {
|
|
||||||
if(ptr_ != buffer_) {
|
|
||||||
free(ptr_);
|
|
||||||
}
|
|
||||||
init_();
|
|
||||||
}
|
}
|
||||||
|
init_();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
ostream & operator << (ostream& os, const LocalVector<T>& vec) {
|
ostream & operator << (ostream& os, const LocalVector<T>& vec) {
|
||||||
if(vec.empty()) {
|
if(vec.empty()) {
|
||||||
return os << "[]";
|
return os << "[]";
|
||||||
}
|
}
|
||||||
os << "[\"" << vec[0];
|
os<<"[\""<<vec[0];
|
||||||
for(size_t i = 1; i < vec.size(); i++) {
|
for(size_t i = 1; i < vec.size(); i++) {
|
||||||
os << "\", \"" << vec[i];
|
os<<"\", \""<<vec[i];
|
||||||
}
|
}
|
||||||
os << "\"]";
|
os<<"\"]";
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_LOGGING_HPP
|
#ifndef LIMONP_LOGGING_HPP
|
||||||
#define LIMONP_LOGGING_HPP
|
#define LIMONP_LOGGING_HPP
|
||||||
|
|
||||||
|
@ -38,55 +20,56 @@
|
||||||
namespace limonp {
|
namespace limonp {
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
LL_DEBUG = 0,
|
LL_DEBUG = 0,
|
||||||
LL_INFO = 1,
|
LL_INFO = 1,
|
||||||
LL_WARNING = 2,
|
LL_WARNING = 2,
|
||||||
LL_ERROR = 3,
|
LL_ERROR = 3,
|
||||||
LL_FATAL = 4,
|
LL_FATAL = 4,
|
||||||
}; // enum
|
}; // enum
|
||||||
|
|
||||||
static const char * LOG_LEVEL_ARRAY[] = {"DEBUG", "INFO", "WARN", "ERROR", "FATAL"};
|
static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
|
||||||
static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S";
|
|
||||||
|
|
||||||
class Logger {
|
class Logger {
|
||||||
public:
|
public:
|
||||||
Logger(size_t level, const char* filename, int lineno)
|
Logger(size_t level, const char* filename, int lineno)
|
||||||
: level_(level) {
|
: level_(level) {
|
||||||
#ifdef LOGGING_LEVEL
|
#ifdef LOGGING_LEVEL
|
||||||
if(level_ < LOGGING_LEVEL) {
|
if (level_ < LOGGING_LEVEL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
assert(level_ <= sizeof(LOG_LEVEL_ARRAY) / sizeof(*LOG_LEVEL_ARRAY));
|
assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
|
||||||
char buf[32];
|
char buf[32];
|
||||||
time_t now;
|
time_t now;
|
||||||
time(&now);
|
time(&now);
|
||||||
strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&now));
|
struct tm result;
|
||||||
stream_ << buf
|
localtime_r(&now, &result);
|
||||||
<< " " << filename
|
strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &result);
|
||||||
<< ":" << lineno
|
stream_ << buf
|
||||||
<< " " << LOG_LEVEL_ARRAY[level_]
|
<< " " << filename
|
||||||
<< " ";
|
<< ":" << lineno
|
||||||
}
|
<< " " << LOG_LEVEL_ARRAY[level_]
|
||||||
~Logger() {
|
<< " ";
|
||||||
|
}
|
||||||
|
~Logger() {
|
||||||
#ifdef LOGGING_LEVEL
|
#ifdef LOGGING_LEVEL
|
||||||
if(level_ < LOGGING_LEVEL) {
|
if (level_ < LOGGING_LEVEL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
std::cerr << stream_.str() << std::endl;
|
std::cerr << stream_.str() << std::endl;
|
||||||
if(level_ == LL_FATAL) {
|
if (level_ == LL_FATAL) {
|
||||||
abort();
|
abort();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::ostream& Stream() {
|
std::ostream& Stream() {
|
||||||
return stream_;
|
return stream_;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::ostringstream stream_;
|
std::ostringstream stream_;
|
||||||
size_t level_;
|
size_t level_;
|
||||||
}; // class Logger
|
}; // class Logger
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_MUTEX_LOCK_HPP
|
#ifndef LIMONP_MUTEX_LOCK_HPP
|
||||||
#define LIMONP_MUTEX_LOCK_HPP
|
#define LIMONP_MUTEX_LOCK_HPP
|
||||||
|
|
||||||
|
@ -26,40 +8,40 @@
|
||||||
namespace limonp {
|
namespace limonp {
|
||||||
|
|
||||||
class MutexLock: NonCopyable {
|
class MutexLock: NonCopyable {
|
||||||
public:
|
public:
|
||||||
MutexLock() {
|
MutexLock() {
|
||||||
XCHECK(!pthread_mutex_init(&mutex_, NULL));
|
XCHECK(!pthread_mutex_init(&mutex_, NULL));
|
||||||
}
|
}
|
||||||
~MutexLock() {
|
~MutexLock() {
|
||||||
XCHECK(!pthread_mutex_destroy(&mutex_));
|
XCHECK(!pthread_mutex_destroy(&mutex_));
|
||||||
}
|
}
|
||||||
pthread_mutex_t* GetPthreadMutex() {
|
pthread_mutex_t* GetPthreadMutex() {
|
||||||
return &mutex_;
|
return &mutex_;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Lock() {
|
void Lock() {
|
||||||
XCHECK(!pthread_mutex_lock(&mutex_));
|
XCHECK(!pthread_mutex_lock(&mutex_));
|
||||||
}
|
}
|
||||||
void Unlock() {
|
void Unlock() {
|
||||||
XCHECK(!pthread_mutex_unlock(&mutex_));
|
XCHECK(!pthread_mutex_unlock(&mutex_));
|
||||||
}
|
}
|
||||||
friend class MutexLockGuard;
|
friend class MutexLockGuard;
|
||||||
|
|
||||||
pthread_mutex_t mutex_;
|
pthread_mutex_t mutex_;
|
||||||
}; // class MutexLock
|
}; // class MutexLock
|
||||||
|
|
||||||
class MutexLockGuard: NonCopyable {
|
class MutexLockGuard: NonCopyable {
|
||||||
public:
|
public:
|
||||||
explicit MutexLockGuard(MutexLock & mutex)
|
explicit MutexLockGuard(MutexLock & mutex)
|
||||||
: mutex_(mutex) {
|
: mutex_(mutex) {
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
}
|
}
|
||||||
~MutexLockGuard() {
|
~MutexLockGuard() {
|
||||||
mutex_.Unlock();
|
mutex_.Unlock();
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
MutexLock & mutex_;
|
MutexLock & mutex_;
|
||||||
}; // class MutexLockGuard
|
}; // class MutexLockGuard
|
||||||
|
|
||||||
#define MutexLockGuard(x) XCHECK(false);
|
#define MutexLockGuard(x) XCHECK(false);
|
||||||
|
|
|
@ -1,35 +1,19 @@
|
||||||
/*
|
/************************************
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
************************************/
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_NONCOPYABLE_H
|
#ifndef LIMONP_NONCOPYABLE_H
|
||||||
#define LIMONP_NONCOPYABLE_H
|
#define LIMONP_NONCOPYABLE_H
|
||||||
|
|
||||||
namespace limonp {
|
namespace limonp {
|
||||||
|
|
||||||
class NonCopyable {
|
class NonCopyable {
|
||||||
protected:
|
protected:
|
||||||
NonCopyable() {
|
NonCopyable() {
|
||||||
}
|
}
|
||||||
~NonCopyable() {
|
~NonCopyable() {
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
NonCopyable(const NonCopyable&);
|
NonCopyable(const NonCopyable& );
|
||||||
const NonCopyable& operator=(const NonCopyable&);
|
const NonCopyable& operator=(const NonCopyable& );
|
||||||
}; // class NonCopyable
|
}; // class NonCopyable
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_STD_EXTEMSION_HPP
|
#ifndef LIMONP_STD_EXTEMSION_HPP
|
||||||
#define LIMONP_STD_EXTEMSION_HPP
|
#define LIMONP_STD_EXTEMSION_HPP
|
||||||
|
|
||||||
|
@ -51,123 +33,123 @@ namespace std {
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
ostream& operator << (ostream& os, const vector<T>& v) {
|
ostream& operator << (ostream& os, const vector<T>& v) {
|
||||||
if(v.empty()) {
|
if(v.empty()) {
|
||||||
return os << "[]";
|
return os << "[]";
|
||||||
}
|
}
|
||||||
os << "[" << v[0];
|
os<<"["<<v[0];
|
||||||
for(size_t i = 1; i < v.size(); i++) {
|
for(size_t i = 1; i < v.size(); i++) {
|
||||||
os << ", " << v[i];
|
os<<", "<<v[i];
|
||||||
}
|
}
|
||||||
os << "]";
|
os<<"]";
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
inline ostream& operator << (ostream& os, const vector<string>& v) {
|
inline ostream& operator << (ostream& os, const vector<string>& v) {
|
||||||
if(v.empty()) {
|
if(v.empty()) {
|
||||||
return os << "[]";
|
return os << "[]";
|
||||||
}
|
}
|
||||||
os << "[\"" << v[0];
|
os<<"[\""<<v[0];
|
||||||
for(size_t i = 1; i < v.size(); i++) {
|
for(size_t i = 1; i < v.size(); i++) {
|
||||||
os << "\", \"" << v[i];
|
os<<"\", \""<<v[i];
|
||||||
}
|
}
|
||||||
os << "\"]";
|
os<<"\"]";
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
ostream& operator << (ostream& os, const deque<T>& dq) {
|
ostream& operator << (ostream& os, const deque<T>& dq) {
|
||||||
if(dq.empty()) {
|
if(dq.empty()) {
|
||||||
return os << "[]";
|
return os << "[]";
|
||||||
}
|
}
|
||||||
os << "[\"" << dq[0];
|
os<<"[\""<<dq[0];
|
||||||
for(size_t i = 1; i < dq.size(); i++) {
|
for(size_t i = 1; i < dq.size(); i++) {
|
||||||
os << "\", \"" << dq[i];
|
os<<"\", \""<<dq[i];
|
||||||
}
|
}
|
||||||
os << "\"]";
|
os<<"\"]";
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class T1, class T2>
|
template<class T1, class T2>
|
||||||
ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
|
ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
|
||||||
os << pr.first << ":" << pr.second ;
|
os << pr.first << ":" << pr.second ;
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
string& operator << (string& str, const T& obj) {
|
string& operator << (string& str, const T& obj) {
|
||||||
stringstream ss;
|
stringstream ss;
|
||||||
ss << obj; // call ostream& operator << (ostream& os,
|
ss << obj; // call ostream& operator << (ostream& os,
|
||||||
return str = ss.str();
|
return str = ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T1, class T2>
|
template<class T1, class T2>
|
||||||
ostream& operator << (ostream& os, const map<T1, T2>& mp) {
|
ostream& operator << (ostream& os, const map<T1, T2>& mp) {
|
||||||
if(mp.empty()) {
|
if(mp.empty()) {
|
||||||
os << "{}";
|
os<<"{}";
|
||||||
return os;
|
|
||||||
}
|
|
||||||
os << '{';
|
|
||||||
typename map<T1, T2>::const_iterator it = mp.begin();
|
|
||||||
os << *it;
|
|
||||||
it++;
|
|
||||||
while(it != mp.end()) {
|
|
||||||
os << ", " << *it;
|
|
||||||
it++;
|
|
||||||
}
|
|
||||||
os << '}';
|
|
||||||
return os;
|
return os;
|
||||||
|
}
|
||||||
|
os<<'{';
|
||||||
|
typename map<T1, T2>::const_iterator it = mp.begin();
|
||||||
|
os<<*it;
|
||||||
|
it++;
|
||||||
|
while(it != mp.end()) {
|
||||||
|
os<<", "<<*it;
|
||||||
|
it++;
|
||||||
|
}
|
||||||
|
os<<'}';
|
||||||
|
return os;
|
||||||
}
|
}
|
||||||
template<class T1, class T2>
|
template<class T1, class T2>
|
||||||
ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
|
ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
|
||||||
if(mp.empty()) {
|
if(mp.empty()) {
|
||||||
return os << "{}";
|
return os << "{}";
|
||||||
}
|
}
|
||||||
os << '{';
|
os<<'{';
|
||||||
typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
|
typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
|
||||||
os << *it;
|
os<<*it;
|
||||||
it++;
|
it++;
|
||||||
while(it != mp.end()) {
|
while(it != mp.end()) {
|
||||||
os << ", " << *it++;
|
os<<", "<<*it++;
|
||||||
}
|
}
|
||||||
return os << '}';
|
return os<<'}';
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
ostream& operator << (ostream& os, const set<T>& st) {
|
ostream& operator << (ostream& os, const set<T>& st) {
|
||||||
if(st.empty()) {
|
if(st.empty()) {
|
||||||
os << "{}";
|
os << "{}";
|
||||||
return os;
|
|
||||||
}
|
|
||||||
os << '{';
|
|
||||||
typename set<T>::const_iterator it = st.begin();
|
|
||||||
os << *it;
|
|
||||||
it++;
|
|
||||||
while(it != st.end()) {
|
|
||||||
os << ", " << *it;
|
|
||||||
it++;
|
|
||||||
}
|
|
||||||
os << '}';
|
|
||||||
return os;
|
return os;
|
||||||
|
}
|
||||||
|
os<<'{';
|
||||||
|
typename set<T>::const_iterator it = st.begin();
|
||||||
|
os<<*it;
|
||||||
|
it++;
|
||||||
|
while(it != st.end()) {
|
||||||
|
os<<", "<<*it;
|
||||||
|
it++;
|
||||||
|
}
|
||||||
|
os<<'}';
|
||||||
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class KeyType, class ContainType>
|
template<class KeyType, class ContainType>
|
||||||
bool IsIn(const ContainType& contain, const KeyType& key) {
|
bool IsIn(const ContainType& contain, const KeyType& key) {
|
||||||
return contain.end() != contain.find(key);
|
return contain.end() != contain.find(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
|
basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
|
||||||
return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
|
return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
|
ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
|
||||||
ostreambuf_iterator<T> itr(ofs);
|
ostreambuf_iterator<T> itr (ofs);
|
||||||
copy(s.begin(), s.end(), itr);
|
copy(s.begin(), s.end(), itr);
|
||||||
return ofs;
|
return ofs;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace std
|
} // namespace std
|
||||||
|
|
|
@ -1,27 +1,14 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
/************************************
|
/************************************
|
||||||
* file enc : ascii
|
* file enc : ascii
|
||||||
* author : wuyanyi09@gmail.com
|
* author : wuyanyi09@gmail.com
|
||||||
************************************/
|
************************************/
|
||||||
#ifndef LIMONP_STR_FUNCTS_H
|
#ifndef LIMONP_STR_FUNCTS_H
|
||||||
#define LIMONP_STR_FUNCTS_H
|
#define LIMONP_STR_FUNCTS_H
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <memory.h>
|
||||||
|
#include <sys/types.h>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@ -29,14 +16,9 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdarg.h>
|
|
||||||
#include <memory.h>
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <locale>
|
#include <locale>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <sys/types.h>
|
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "StdExtension.hpp"
|
#include "StdExtension.hpp"
|
||||||
|
@ -44,339 +26,356 @@
|
||||||
namespace limonp {
|
namespace limonp {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
inline string StringFormat(const char* fmt, ...) {
|
inline string StringFormat(const char* fmt, ...) {
|
||||||
int size = 256;
|
int size = 256;
|
||||||
std::string str;
|
std::string str;
|
||||||
va_list ap;
|
va_list ap;
|
||||||
while(1) {
|
while (1) {
|
||||||
str.resize(size);
|
str.resize(size);
|
||||||
va_start(ap, fmt);
|
va_start(ap, fmt);
|
||||||
int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
|
int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
if(n > -1 && n < size) {
|
if (n > -1 && n < size) {
|
||||||
str.resize(n);
|
str.resize(n);
|
||||||
return str;
|
return str;
|
||||||
}
|
|
||||||
if(n > -1)
|
|
||||||
size = n + 1;
|
|
||||||
else
|
|
||||||
size *= 2;
|
|
||||||
}
|
}
|
||||||
return str;
|
if (n > -1)
|
||||||
|
size = n + 1;
|
||||||
|
else
|
||||||
|
size *= 2;
|
||||||
|
}
|
||||||
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void Join(T begin, T end, string& res, const string& connector) {
|
void Join(T begin, T end, string& res, const string& connector) {
|
||||||
if(begin == end) {
|
if(begin == end) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
stringstream ss;
|
stringstream ss;
|
||||||
ss << *begin;
|
ss<<*begin;
|
||||||
begin++;
|
begin++;
|
||||||
while(begin != end) {
|
while(begin != end) {
|
||||||
ss << connector << *begin;
|
ss << connector << *begin;
|
||||||
begin ++;
|
begin ++;
|
||||||
}
|
}
|
||||||
res = ss.str();
|
res = ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
string Join(T begin, T end, const string& connector) {
|
string Join(T begin, T end, const string& connector) {
|
||||||
string res;
|
string res;
|
||||||
Join(begin, end, res, connector);
|
Join(begin ,end, res, connector);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline string& Upper(string& str) {
|
inline string& Upper(string& str) {
|
||||||
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
|
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline string& Lower(string& str) {
|
inline string& Lower(string& str) {
|
||||||
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
|
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool IsSpace(unsigned c) {
|
inline bool IsSpace(unsigned c) {
|
||||||
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
|
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
|
||||||
return c > 0xff ? false : std::isspace(c & 0xff) != 0;
|
return c > 0xff ? false : std::isspace(c & 0xff);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::string& LTrim(std::string &s) {
|
inline std::string& LTrim(std::string &s) {
|
||||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
|
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::string& RTrim(std::string &s) {
|
inline std::string& RTrim(std::string &s) {
|
||||||
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
|
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::string& Trim(std::string &s) {
|
inline std::string& Trim(std::string &s) {
|
||||||
return LTrim(RTrim(s));
|
return LTrim(RTrim(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::string& LTrim(std::string & s, char x) {
|
inline std::string& LTrim(std::string & s, char x) {
|
||||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
|
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::string& RTrim(std::string & s, char x) {
|
inline std::string& RTrim(std::string & s, char x) {
|
||||||
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
|
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::string& Trim(std::string &s, char x) {
|
inline std::string& Trim(std::string &s, char x) {
|
||||||
return LTrim(RTrim(s, x), x);
|
return LTrim(RTrim(s, x), x);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
|
inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
|
||||||
res.clear();
|
res.clear();
|
||||||
size_t Start = 0;
|
size_t Start = 0;
|
||||||
size_t end = 0;
|
size_t end = 0;
|
||||||
string sub;
|
string sub;
|
||||||
while(Start < src.size()) {
|
while(Start < src.size()) {
|
||||||
end = src.find_first_of(pattern, Start);
|
end = src.find_first_of(pattern, Start);
|
||||||
if(string::npos == end || res.size() >= maxsplit) {
|
if(string::npos == end || res.size() >= maxsplit) {
|
||||||
sub = src.substr(Start);
|
sub = src.substr(Start);
|
||||||
res.push_back(sub);
|
res.push_back(sub);
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
sub = src.substr(Start, end - Start);
|
|
||||||
res.push_back(sub);
|
|
||||||
Start = end + 1;
|
|
||||||
}
|
}
|
||||||
return;
|
sub = src.substr(Start, end - Start);
|
||||||
|
res.push_back(sub);
|
||||||
|
Start = end + 1;
|
||||||
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
|
inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
|
||||||
vector<string> res;
|
vector<string> res;
|
||||||
Split(src, res, pattern, maxsplit);
|
Split(src, res, pattern, maxsplit);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool StartsWith(const string& str, const string& prefix) {
|
inline bool StartsWith(const string& str, const string& prefix) {
|
||||||
if(prefix.length() > str.length()) {
|
if(prefix.length() > str.length()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return 0 == str.compare(0, prefix.length(), prefix);
|
return 0 == str.compare(0, prefix.length(), prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool EndsWith(const string& str, const string& suffix) {
|
inline bool EndsWith(const string& str, const string& suffix) {
|
||||||
if(suffix.length() > str.length()) {
|
if(suffix.length() > str.length()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
|
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool IsInStr(const string& str, char ch) {
|
inline bool IsInStr(const string& str, char ch) {
|
||||||
return str.find(ch) != string::npos;
|
return str.find(ch) != string::npos;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline uint16_t TwocharToUint16(char high, char low) {
|
inline uint16_t TwocharToUint16(char high, char low) {
|
||||||
return (((uint16_t(high) & 0x00ff) << 8) | (uint16_t(low) & 0x00ff));
|
return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Uint16Container>
|
template <class Uint16Container>
|
||||||
bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
|
bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
|
||||||
if(!str) {
|
if(!str) {
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
char ch1, ch2;
|
||||||
|
uint16_t tmp;
|
||||||
|
vec.clear();
|
||||||
|
for(size_t i = 0; i < len;) {
|
||||||
|
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
||||||
|
vec.push_back(str[i]);
|
||||||
|
i++;
|
||||||
|
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
|
||||||
|
ch1 = (str[i] >> 2) & 0x07;
|
||||||
|
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
|
||||||
|
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
||||||
|
vec.push_back(tmp);
|
||||||
|
i += 2;
|
||||||
|
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
|
||||||
|
ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
|
||||||
|
ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
|
||||||
|
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
||||||
|
vec.push_back(tmp);
|
||||||
|
i += 3;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
char ch1, ch2;
|
}
|
||||||
uint16_t tmp;
|
return true;
|
||||||
vec.clear();
|
|
||||||
for(size_t i = 0; i < len;) {
|
|
||||||
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
|
||||||
vec.push_back(str[i]);
|
|
||||||
i++;
|
|
||||||
} else if((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
|
|
||||||
ch1 = (str[i] >> 2) & 0x07;
|
|
||||||
ch2 = (str[i + 1] & 0x3f) | ((str[i] & 0x03) << 6);
|
|
||||||
tmp = (((uint16_t(ch1) & 0x00ff) << 8) | (uint16_t(ch2) & 0x00ff));
|
|
||||||
vec.push_back(tmp);
|
|
||||||
i += 2;
|
|
||||||
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
|
|
||||||
ch1 = ((uint8_t)str[i] << 4) | ((str[i + 1] >> 2) & 0x0f);
|
|
||||||
ch2 = (((uint8_t)str[i + 1] << 6) & 0xc0) | (str[i + 2] & 0x3f);
|
|
||||||
tmp = (((uint16_t(ch1) & 0x00ff) << 8) | (uint16_t(ch2) & 0x00ff));
|
|
||||||
vec.push_back(tmp);
|
|
||||||
i += 3;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Uint16Container>
|
template <class Uint16Container>
|
||||||
bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
|
bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
|
||||||
return Utf8ToUnicode(str.c_str(), str.size(), vec);
|
return Utf8ToUnicode(str.c_str(), str.size(), vec);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Uint32Container>
|
||||||
|
bool Utf8ToUnicode32(const char * str, size_t size, Uint32Container& vec) {
|
||||||
|
uint32_t tmp;
|
||||||
|
vec.clear();
|
||||||
|
for(size_t i = 0; i < size;) {
|
||||||
|
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
||||||
|
// 7bit, total 7bit
|
||||||
|
tmp = (uint8_t)(str[i]) & 0x7f;
|
||||||
|
i++;
|
||||||
|
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < size) { // 110xxxxxx
|
||||||
|
// 5bit, total 5bit
|
||||||
|
tmp = (uint8_t)(str[i]) & 0x1f;
|
||||||
|
|
||||||
|
// 6bit, total 11bit
|
||||||
|
tmp <<= 6;
|
||||||
|
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
||||||
|
i += 2;
|
||||||
|
} else if((uint8_t)str[i] <= 0xef && i + 2 < size) { // 1110xxxxxx
|
||||||
|
// 4bit, total 4bit
|
||||||
|
tmp = (uint8_t)(str[i]) & 0x0f;
|
||||||
|
|
||||||
|
// 6bit, total 10bit
|
||||||
|
tmp <<= 6;
|
||||||
|
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
||||||
|
|
||||||
|
// 6bit, total 16bit
|
||||||
|
tmp <<= 6;
|
||||||
|
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
||||||
|
|
||||||
|
i += 3;
|
||||||
|
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < size) { // 11110xxxx
|
||||||
|
// 3bit, total 3bit
|
||||||
|
tmp = (uint8_t)(str[i]) & 0x07;
|
||||||
|
|
||||||
|
// 6bit, total 9bit
|
||||||
|
tmp <<= 6;
|
||||||
|
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
||||||
|
|
||||||
|
// 6bit, total 15bit
|
||||||
|
tmp <<= 6;
|
||||||
|
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
||||||
|
|
||||||
|
// 6bit, total 21bit
|
||||||
|
tmp <<= 6;
|
||||||
|
tmp |= (uint8_t)(str[i+3]) & 0x3f;
|
||||||
|
|
||||||
|
i += 4;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
vec.push_back(tmp);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Uint32Container>
|
template <class Uint32Container>
|
||||||
bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
|
bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
|
||||||
uint32_t tmp;
|
return Utf8ToUnicode32(str.data(), str.size(), vec);
|
||||||
vec.clear();
|
}
|
||||||
for(size_t i = 0; i < str.size();) {
|
|
||||||
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
|
||||||
// 7bit, total 7bit
|
|
||||||
tmp = (uint8_t)(str[i]) & 0x7f;
|
|
||||||
i++;
|
|
||||||
} else if((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
|
|
||||||
// 5bit, total 5bit
|
|
||||||
tmp = (uint8_t)(str[i]) & 0x1f;
|
|
||||||
|
|
||||||
// 6bit, total 11bit
|
inline int UnicodeToUtf8Bytes(uint32_t ui){
|
||||||
tmp <<= 6;
|
if(ui <= 0x7f) {
|
||||||
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
|
return 1;
|
||||||
i += 2;
|
} else if(ui <= 0x7ff) {
|
||||||
} else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
|
return 2;
|
||||||
// 4bit, total 4bit
|
} else if(ui <= 0xffff) {
|
||||||
tmp = (uint8_t)(str[i]) & 0x0f;
|
return 3;
|
||||||
|
} else {
|
||||||
// 6bit, total 10bit
|
return 4;
|
||||||
tmp <<= 6;
|
|
||||||
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
|
|
||||||
|
|
||||||
// 6bit, total 16bit
|
|
||||||
tmp <<= 6;
|
|
||||||
tmp |= (uint8_t)(str[i + 2]) & 0x3f;
|
|
||||||
|
|
||||||
i += 3;
|
|
||||||
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
|
|
||||||
// 3bit, total 3bit
|
|
||||||
tmp = (uint8_t)(str[i]) & 0x07;
|
|
||||||
|
|
||||||
// 6bit, total 9bit
|
|
||||||
tmp <<= 6;
|
|
||||||
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
|
|
||||||
|
|
||||||
// 6bit, total 15bit
|
|
||||||
tmp <<= 6;
|
|
||||||
tmp |= (uint8_t)(str[i + 2]) & 0x3f;
|
|
||||||
|
|
||||||
// 6bit, total 21bit
|
|
||||||
tmp <<= 6;
|
|
||||||
tmp |= (uint8_t)(str[i + 3]) & 0x3f;
|
|
||||||
|
|
||||||
i += 4;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
vec.push_back(tmp);
|
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Uint32ContainerConIter>
|
template <class Uint32ContainerConIter>
|
||||||
void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
|
void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
|
||||||
res.clear();
|
res.clear();
|
||||||
uint32_t ui;
|
uint32_t ui;
|
||||||
while(begin != end) {
|
while(begin != end) {
|
||||||
ui = *begin;
|
ui = *begin;
|
||||||
if(ui <= 0x7f) {
|
if(ui <= 0x7f) {
|
||||||
res += char(ui);
|
res += char(ui);
|
||||||
} else if(ui <= 0x7ff) {
|
} else if(ui <= 0x7ff) {
|
||||||
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
||||||
res += char((ui & 0x3f) | 0x80);
|
res += char((ui & 0x3f) | 0x80);
|
||||||
} else if(ui <= 0xffff) {
|
} else if(ui <= 0xffff) {
|
||||||
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
||||||
res += char(((ui >> 6) & 0x3f) | 0x80);
|
res += char(((ui >> 6) & 0x3f) | 0x80);
|
||||||
res += char((ui & 0x3f) | 0x80);
|
res += char((ui & 0x3f) | 0x80);
|
||||||
} else {
|
} else {
|
||||||
res += char(((ui >> 18) & 0x03) | 0xf0);
|
res += char(((ui >> 18) & 0x03) | 0xf0);
|
||||||
res += char(((ui >> 12) & 0x3f) | 0x80);
|
res += char(((ui >> 12) & 0x3f) | 0x80);
|
||||||
res += char(((ui >> 6) & 0x3f) | 0x80);
|
res += char(((ui >> 6) & 0x3f) | 0x80);
|
||||||
res += char((ui & 0x3f) | 0x80);
|
res += char((ui & 0x3f) | 0x80);
|
||||||
}
|
|
||||||
begin ++;
|
|
||||||
}
|
}
|
||||||
|
begin ++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Uint16ContainerConIter>
|
template <class Uint16ContainerConIter>
|
||||||
void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
||||||
res.clear();
|
res.clear();
|
||||||
uint16_t ui;
|
uint16_t ui;
|
||||||
while(begin != end) {
|
while(begin != end) {
|
||||||
ui = *begin;
|
ui = *begin;
|
||||||
if(ui <= 0x7f) {
|
if(ui <= 0x7f) {
|
||||||
res += char(ui);
|
res += char(ui);
|
||||||
} else if(ui <= 0x7ff) {
|
} else if(ui <= 0x7ff) {
|
||||||
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
res += char(((ui>>6) & 0x1f) | 0xc0);
|
||||||
res += char((ui & 0x3f) | 0x80);
|
res += char((ui & 0x3f) | 0x80);
|
||||||
} else {
|
} else {
|
||||||
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
res += char(((ui >> 12) & 0x0f )| 0xe0);
|
||||||
res += char(((ui >> 6) & 0x3f) | 0x80);
|
res += char(((ui>>6) & 0x3f )| 0x80 );
|
||||||
res += char((ui & 0x3f) | 0x80);
|
res += char((ui & 0x3f) | 0x80);
|
||||||
}
|
|
||||||
begin ++;
|
|
||||||
}
|
}
|
||||||
|
begin ++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class Uint16Container>
|
template <class Uint16Container>
|
||||||
bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
|
bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
|
||||||
vec.clear();
|
vec.clear();
|
||||||
if(!str) {
|
if(!str) {
|
||||||
return true;
|
|
||||||
}
|
|
||||||
size_t i = 0;
|
|
||||||
while(i < len) {
|
|
||||||
if(0 == (str[i] & 0x80)) {
|
|
||||||
vec.push_back(uint16_t(str[i]));
|
|
||||||
i++;
|
|
||||||
} else {
|
|
||||||
if(i + 1 < len) { //&& (str[i+1] & 0x80))
|
|
||||||
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff) << 8) | (uint16_t(str[i + 1]) & 0x00ff));
|
|
||||||
vec.push_back(tmp);
|
|
||||||
i += 2;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
size_t i = 0;
|
||||||
|
while(i < len) {
|
||||||
|
if(0 == (str[i] & 0x80)) {
|
||||||
|
vec.push_back(uint16_t(str[i]));
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
if(i + 1 < len) { //&& (str[i+1] & 0x80))
|
||||||
|
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
|
||||||
|
vec.push_back(tmp);
|
||||||
|
i += 2;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Uint16Container>
|
template <class Uint16Container>
|
||||||
bool GBKTrans(const string& str, Uint16Container& vec) {
|
bool GBKTrans(const string& str, Uint16Container& vec) {
|
||||||
return GBKTrans(str.c_str(), str.size(), vec);
|
return GBKTrans(str.c_str(), str.size(), vec);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Uint16ContainerConIter>
|
template <class Uint16ContainerConIter>
|
||||||
void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
||||||
res.clear();
|
res.clear();
|
||||||
//pair<char, char> pa;
|
//pair<char, char> pa;
|
||||||
char first, second;
|
char first, second;
|
||||||
while(begin != end) {
|
while(begin != end) {
|
||||||
//pa = uint16ToChar2(*begin);
|
//pa = uint16ToChar2(*begin);
|
||||||
first = ((*begin) >> 8) & 0x00ff;
|
first = ((*begin)>>8) & 0x00ff;
|
||||||
second = (*begin) & 0x00ff;
|
second = (*begin) & 0x00ff;
|
||||||
if(first & 0x80) {
|
if(first & 0x80) {
|
||||||
res += first;
|
res += first;
|
||||||
res += second;
|
res += second;
|
||||||
} else {
|
} else {
|
||||||
res += second;
|
res += second;
|
||||||
}
|
|
||||||
begin++;
|
|
||||||
}
|
}
|
||||||
|
begin++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* format example: "%Y-%m-%d %H:%M:%S"
|
* format example: "%Y-%m-%d %H:%M:%S"
|
||||||
*/
|
*/
|
||||||
inline void GetTime(const string& format, string& timeStr) {
|
// inline void GetTime(const string& format, string& timeStr) {
|
||||||
time_t timeNow;
|
// time_t timeNow;
|
||||||
time(&timeNow);
|
// time(&timeNow);
|
||||||
timeStr.resize(64);
|
// timeStr.resize(64);
|
||||||
size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
|
// size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
|
||||||
timeStr.resize(len);
|
// timeStr.resize(len);
|
||||||
}
|
// }
|
||||||
|
|
||||||
inline string PathJoin(const string& path1, const string& path2) {
|
inline string PathJoin(const string& path1, const string& path2) {
|
||||||
if(EndsWith(path1, "/")) {
|
if(EndsWith(path1, "/")) {
|
||||||
return path1 + path2;
|
return path1 + path2;
|
||||||
}
|
}
|
||||||
return path1 + "/" + path2;
|
return path1 + "/" + path2;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_THREAD_HPP
|
#ifndef LIMONP_THREAD_HPP
|
||||||
#define LIMONP_THREAD_HPP
|
#define LIMONP_THREAD_HPP
|
||||||
|
|
||||||
|
@ -25,36 +7,36 @@
|
||||||
namespace limonp {
|
namespace limonp {
|
||||||
|
|
||||||
class IThread: NonCopyable {
|
class IThread: NonCopyable {
|
||||||
public:
|
public:
|
||||||
IThread(): isStarted(false), isJoined(false) {
|
IThread(): isStarted(false), isJoined(false) {
|
||||||
|
}
|
||||||
|
virtual ~IThread() {
|
||||||
|
if(isStarted && !isJoined) {
|
||||||
|
XCHECK(!pthread_detach(thread_));
|
||||||
}
|
}
|
||||||
virtual ~IThread() {
|
};
|
||||||
if(isStarted && !isJoined) {
|
|
||||||
XCHECK(!pthread_detach(thread_));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
virtual void Run() = 0;
|
virtual void Run() = 0;
|
||||||
void Start() {
|
void Start() {
|
||||||
XCHECK(!isStarted);
|
XCHECK(!isStarted);
|
||||||
XCHECK(!pthread_create(&thread_, NULL, Worker, this));
|
XCHECK(!pthread_create(&thread_, NULL, Worker, this));
|
||||||
isStarted = true;
|
isStarted = true;
|
||||||
}
|
}
|
||||||
void Join() {
|
void Join() {
|
||||||
XCHECK(!isJoined);
|
XCHECK(!isJoined);
|
||||||
XCHECK(!pthread_join(thread_, NULL));
|
XCHECK(!pthread_join(thread_, NULL));
|
||||||
isJoined = true;
|
isJoined = true;
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
static void * Worker(void * data) {
|
static void * Worker(void * data) {
|
||||||
IThread * ptr = (IThread*) data;
|
IThread * ptr = (IThread* ) data;
|
||||||
ptr->Run();
|
ptr->Run();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_t thread_;
|
pthread_t thread_;
|
||||||
bool isStarted;
|
bool isStarted;
|
||||||
bool isJoined;
|
bool isJoined;
|
||||||
}; // class IThread
|
}; // class IThread
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -1,21 +1,3 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#ifndef LIMONP_THREAD_POOL_HPP
|
#ifndef LIMONP_THREAD_POOL_HPP
|
||||||
#define LIMONP_THREAD_POOL_HPP
|
#define LIMONP_THREAD_POOL_HPP
|
||||||
|
|
||||||
|
@ -30,73 +12,73 @@ using namespace std;
|
||||||
|
|
||||||
//class ThreadPool;
|
//class ThreadPool;
|
||||||
class ThreadPool: NonCopyable {
|
class ThreadPool: NonCopyable {
|
||||||
public:
|
public:
|
||||||
class Worker: public IThread {
|
class Worker: public IThread {
|
||||||
public:
|
public:
|
||||||
Worker(ThreadPool* pool): ptThreadPool_(pool) {
|
Worker(ThreadPool* pool): ptThreadPool_(pool) {
|
||||||
assert(ptThreadPool_);
|
assert(ptThreadPool_);
|
||||||
}
|
|
||||||
virtual ~Worker() {
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void Run() {
|
|
||||||
while(true) {
|
|
||||||
ClosureInterface* closure = ptThreadPool_->queue_.Pop();
|
|
||||||
if(closure == NULL) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
closure->Run();
|
|
||||||
} catch(std::exception& e) {
|
|
||||||
XLOG(ERROR) << e.what();
|
|
||||||
} catch(...) {
|
|
||||||
XLOG(ERROR) << " unknown exception.";
|
|
||||||
}
|
|
||||||
delete closure;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
ThreadPool * ptThreadPool_;
|
|
||||||
}; // class Worker
|
|
||||||
|
|
||||||
ThreadPool(size_t thread_num)
|
|
||||||
: threads_(thread_num),
|
|
||||||
queue_(thread_num) {
|
|
||||||
assert(thread_num);
|
|
||||||
for(size_t i = 0; i < threads_.size(); i ++) {
|
|
||||||
threads_[i] = new Worker(this);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
~ThreadPool() {
|
virtual ~Worker() {
|
||||||
Stop();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Start() {
|
virtual void Run() {
|
||||||
for(size_t i = 0; i < threads_.size(); i++) {
|
while (true) {
|
||||||
threads_[i]->Start();
|
ClosureInterface* closure = ptThreadPool_->queue_.Pop();
|
||||||
|
if (closure == NULL) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
try {
|
||||||
void Stop() {
|
closure->Run();
|
||||||
for(size_t i = 0; i < threads_.size(); i ++) {
|
} catch(std::exception& e) {
|
||||||
queue_.Push(NULL);
|
XLOG(ERROR) << e.what();
|
||||||
|
} catch(...) {
|
||||||
|
XLOG(ERROR) << " unknown exception.";
|
||||||
}
|
}
|
||||||
for(size_t i = 0; i < threads_.size(); i ++) {
|
delete closure;
|
||||||
threads_[i]->Join();
|
}
|
||||||
delete threads_[i];
|
|
||||||
}
|
|
||||||
threads_.clear();
|
|
||||||
}
|
}
|
||||||
|
private:
|
||||||
|
ThreadPool * ptThreadPool_;
|
||||||
|
}; // class Worker
|
||||||
|
|
||||||
void Add(ClosureInterface* task) {
|
ThreadPool(size_t thread_num)
|
||||||
assert(task);
|
: threads_(thread_num),
|
||||||
queue_.Push(task);
|
queue_(thread_num) {
|
||||||
|
assert(thread_num);
|
||||||
|
for(size_t i = 0; i < threads_.size(); i ++) {
|
||||||
|
threads_[i] = new Worker(this);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
~ThreadPool() {
|
||||||
|
Stop();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
void Start() {
|
||||||
friend class Worker;
|
for(size_t i = 0; i < threads_.size(); i++) {
|
||||||
|
threads_[i]->Start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void Stop() {
|
||||||
|
for(size_t i = 0; i < threads_.size(); i ++) {
|
||||||
|
queue_.Push(NULL);
|
||||||
|
}
|
||||||
|
for(size_t i = 0; i < threads_.size(); i ++) {
|
||||||
|
threads_[i]->Join();
|
||||||
|
delete threads_[i];
|
||||||
|
}
|
||||||
|
threads_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
vector<IThread*> threads_;
|
void Add(ClosureInterface* task) {
|
||||||
BoundedBlockingQueue<ClosureInterface*> queue_;
|
assert(task);
|
||||||
|
queue_.Push(task);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class Worker;
|
||||||
|
|
||||||
|
vector<IThread*> threads_;
|
||||||
|
BoundedBlockingQueue<ClosureInterface*> queue_;
|
||||||
}; // class ThreadPool
|
}; // class ThreadPool
|
||||||
|
|
||||||
} // namespace limonp
|
} // namespace limonp
|
||||||
|
|
|
@ -19,6 +19,8 @@ DEFINES += QT_DEPRECATED_WARNINGS
|
||||||
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
||||||
include(cppjieba/cppjieba.pri)
|
include(cppjieba/cppjieba.pri)
|
||||||
|
|
||||||
|
#LIBS += -L/usr/local/lib/libjemalloc -ljemalloc
|
||||||
|
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
chinese-segmentation.cpp \
|
chinese-segmentation.cpp \
|
||||||
|
|
||||||
|
|
|
@ -46,6 +46,7 @@ AppMatch::AppMatch(QObject *parent) : QThread(parent)
|
||||||
if(!m_interFace->isValid()) {
|
if(!m_interFace->isValid()) {
|
||||||
qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message());
|
qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message());
|
||||||
}
|
}
|
||||||
|
m_interFace->setTimeout(200);
|
||||||
qDebug() << "AppMatch is new";
|
qDebug() << "AppMatch is new";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -227,10 +228,11 @@ void AppMatch::getDesktopFilePath() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void AppMatch::getAppName(QMap<NameString, QStringList> &installed) {
|
void AppMatch::getAppName(QMap<NameString, QStringList> &installed) {
|
||||||
QMap<NameString, QStringList>::const_iterator i;
|
// QMap<NameString, QStringList>::const_iterator i;
|
||||||
for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) {
|
// for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) {
|
||||||
appNameMatch(i.key().app_name, installed);
|
// appNameMatch(i.key().app_name, installed);
|
||||||
}
|
// }
|
||||||
|
appNameMatch(installed);
|
||||||
qDebug() << "installed app match is successful!";
|
qDebug() << "installed app match is successful!";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -275,12 +277,44 @@ void AppMatch::appNameMatch(QString appname, QMap<NameString, QStringList> &inst
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void AppMatch::appNameMatch(QMap<NameString, QStringList> &installed) {
|
||||||
|
QStringList list;
|
||||||
|
NameString name;
|
||||||
|
QMapIterator<NameString, QStringList> iter(m_installAppMap);
|
||||||
|
while(iter.hasNext()) {
|
||||||
|
iter.next();
|
||||||
|
list = iter.value();
|
||||||
|
name.app_name = iter.key().app_name;
|
||||||
|
if(iter.key().app_name.contains(m_sourceText, Qt::CaseInsensitive)) {
|
||||||
|
installed.insert(name, list);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
QStringList pinyinlist;
|
||||||
|
pinyinlist = FileUtils::findMultiToneWords(iter.key().app_name);
|
||||||
|
|
||||||
|
for(int i = 0; i < pinyinlist.size() / 2; i++) {
|
||||||
|
QString shouzimu = pinyinlist.at(2 * i + 1); // 中文转首字母
|
||||||
|
if(shouzimu.contains(m_sourceText, Qt::CaseInsensitive)) {
|
||||||
|
installed.insert(name, list);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(m_sourceText.size() < 2)
|
||||||
|
break;
|
||||||
|
QString pinyin = pinyinlist.at(2 * i); // 中文转拼音
|
||||||
|
if(pinyin.contains(m_sourceText, Qt::CaseInsensitive)) {
|
||||||
|
installed.insert(name, list);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void AppMatch::softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn) {
|
void AppMatch::softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn) {
|
||||||
if(m_interFace->timeout() != -1) {
|
// if(m_interFace->timeout() != -1) {
|
||||||
qWarning() << "softWareCente Dbus is timeout !";
|
// qWarning() << "softWareCente Dbus is timeout !";
|
||||||
return;
|
// return;
|
||||||
}
|
// }
|
||||||
slotDBusCallFinished(softwarereturn);
|
slotDBusCallFinished(softwarereturn);
|
||||||
qDebug() << "softWareCenter match app is successful!";
|
qDebug() << "softWareCenter match app is successful!";
|
||||||
}
|
}
|
||||||
|
@ -349,7 +383,7 @@ void AppMatch::run() {
|
||||||
QDir androidPath(QDir::homePath() + "/.local/share/applications/");
|
QDir androidPath(QDir::homePath() + "/.local/share/applications/");
|
||||||
if(androidPath.exists())
|
if(androidPath.exists())
|
||||||
this->getAllDesktopFilePath(QDir::homePath() + "/.local/share/applications/");
|
this->getAllDesktopFilePath(QDir::homePath() + "/.local/share/applications/");
|
||||||
connect(m_watchAppDir, &QFileSystemWatcher::directoryChanged, this, [ = ](const QString & path) {
|
connect(m_watchAppDir, &QFileSystemWatcher::directoryChanged, this, [ = ](const QString & path) {
|
||||||
this->getDesktopFilePath();
|
this->getDesktopFilePath();
|
||||||
if(path == "/usr/share/applications/") {
|
if(path == "/usr/share/applications/") {
|
||||||
this->getAllDesktopFilePath("/usr/share/applications/");
|
this->getAllDesktopFilePath("/usr/share/applications/");
|
||||||
|
|
|
@ -65,6 +65,7 @@ private:
|
||||||
void getAppName(QMap<NameString, QStringList> &installed);
|
void getAppName(QMap<NameString, QStringList> &installed);
|
||||||
// void appNameMatch(QString appname,QString desktoppath,QString appicon);
|
// void appNameMatch(QString appname,QString desktoppath,QString appicon);
|
||||||
void appNameMatch(QString appname, QMap<NameString, QStringList> &installed);
|
void appNameMatch(QString appname, QMap<NameString, QStringList> &installed);
|
||||||
|
void appNameMatch(QMap<NameString, QStringList> &installed);
|
||||||
|
|
||||||
void softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn);
|
void softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,219 @@
|
||||||
|
#include "app-search-plugin.h"
|
||||||
|
#include <gio/gdesktopappinfo.h>
|
||||||
|
#include <QWidget>
|
||||||
|
#include <QLabel>
|
||||||
|
using namespace Zeeker;
|
||||||
|
size_t AppSearchPlugin::uniqueSymbol = 0;
|
||||||
|
QMutex AppSearchPlugin::m_mutex;
|
||||||
|
AppSearchPlugin::AppSearchPlugin(QObject *parent) : QObject(parent)
|
||||||
|
{
|
||||||
|
SearchPluginIface::Actioninfo open { 0, tr("Open")};
|
||||||
|
SearchPluginIface::Actioninfo addtoDesktop { 1, tr("Add Shortcut to Desktop")};
|
||||||
|
SearchPluginIface::Actioninfo addtoPanel { 2, tr("Add Shortcut to Panel")};
|
||||||
|
SearchPluginIface::Actioninfo install { 0, tr("Install")};
|
||||||
|
m_actionInfo_installed << open << addtoDesktop << addtoPanel;
|
||||||
|
m_actionInfo_not_installed << install;
|
||||||
|
AppMatch::getAppMatch()->start();
|
||||||
|
m_pool.setMaxThreadCount(2);
|
||||||
|
m_pool.setExpiryTimeout(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
const QString AppSearchPlugin::name()
|
||||||
|
{
|
||||||
|
return tr("Applications Search");
|
||||||
|
}
|
||||||
|
|
||||||
|
const QString AppSearchPlugin::description()
|
||||||
|
{
|
||||||
|
return tr("Applications Search");
|
||||||
|
}
|
||||||
|
|
||||||
|
QString AppSearchPlugin::getPluginName()
|
||||||
|
{
|
||||||
|
return tr("Applications Search");
|
||||||
|
}
|
||||||
|
|
||||||
|
void AppSearchPlugin::KeywordSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult)
|
||||||
|
{
|
||||||
|
m_mutex.lock();
|
||||||
|
++uniqueSymbol;
|
||||||
|
m_mutex.unlock();
|
||||||
|
AppSearch *appsearch = new AppSearch(searchResult, keyword, uniqueSymbol);
|
||||||
|
m_pool.start(appsearch);
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<SearchPluginIface::Actioninfo> AppSearchPlugin::getActioninfo(int type)
|
||||||
|
{
|
||||||
|
switch (type) {
|
||||||
|
case 0:
|
||||||
|
return m_actionInfo_installed;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
return m_actionInfo_not_installed;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return QList<SearchPluginIface::Actioninfo>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AppSearchPlugin::openAction(int actionkey, QString key, int type)
|
||||||
|
{
|
||||||
|
switch (type) {
|
||||||
|
case 0:
|
||||||
|
switch (actionkey) {
|
||||||
|
case 0:
|
||||||
|
if(!launch(key)) {
|
||||||
|
qWarning() << "Fail to launch:" << key;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
if(!addDesktopShortcut(key)) {
|
||||||
|
qWarning() << "Fail to add Desktop Shortcut:" << key;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
if(!addPanelShortcut(key)) {
|
||||||
|
qWarning() << "Fail to add Panel Shortcut:" << key;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
if(!installAppAction(key)) {
|
||||||
|
qWarning() << "Fail to install:" << key;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AppSearchPlugin::isPreviewEnable(QString key, int type)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
QWidget *AppSearchPlugin::previewPage(QString key, int type, QWidget *parent = nullptr)
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AppSearchPlugin::launch(const QString &path)
|
||||||
|
{
|
||||||
|
GDesktopAppInfo * desktopAppInfo = g_desktop_app_info_new_from_filename(path.toLocal8Bit().data());
|
||||||
|
bool res = static_cast<bool>(g_app_info_launch(G_APP_INFO(desktopAppInfo), nullptr, nullptr, nullptr));
|
||||||
|
g_object_unref(desktopAppInfo);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
bool AppSearchPlugin::addPanelShortcut(const QString& path) {
|
||||||
|
QDBusInterface iface("com.ukui.panel.desktop",
|
||||||
|
"/",
|
||||||
|
"com.ukui.panel.desktop",
|
||||||
|
QDBusConnection::sessionBus());
|
||||||
|
if(iface.isValid()) {
|
||||||
|
QDBusReply<bool> isExist = iface.call("CheckIfExist", path);
|
||||||
|
if(isExist) {
|
||||||
|
qWarning() << "Add shortcut to panel failed, because it is already existed!";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
QDBusReply<QVariant> ret = iface.call("AddToTaskbar", path);
|
||||||
|
qDebug() << "Add shortcut to panel successed!";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AppSearchPlugin::addDesktopShortcut(const QString& path) {
|
||||||
|
QString dirpath = QStandardPaths::writableLocation(QStandardPaths::DesktopLocation);
|
||||||
|
QFileInfo fileInfo(path);
|
||||||
|
QString desktopfn = fileInfo.fileName();
|
||||||
|
QFile file(path);
|
||||||
|
QString newName = QString(dirpath + "/" + desktopfn);
|
||||||
|
bool ret = file.copy(QString(dirpath + "/" + desktopfn));
|
||||||
|
if(ret) {
|
||||||
|
QProcess process;
|
||||||
|
process.startDetached(QString("chmod a+x %1").arg(newName));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AppSearchPlugin::installAppAction(const QString & name) {
|
||||||
|
QDBusInterface * interface = new QDBusInterface("com.kylin.softwarecenter",
|
||||||
|
"/com/kylin/softwarecenter",
|
||||||
|
"com.kylin.utiliface",
|
||||||
|
QDBusConnection::sessionBus());
|
||||||
|
|
||||||
|
if(interface->isValid()) {
|
||||||
|
//软件商店已打开,直接跳转
|
||||||
|
interface->call("show_search_result", name);
|
||||||
|
bool reply = QDBusReply<bool>(interface->call(QString("show_search_result"), name));
|
||||||
|
return reply;
|
||||||
|
} else {
|
||||||
|
//软件商店未打开,打开软件商店下载此软件
|
||||||
|
qDebug() << "Softwarecenter has not been launched, now launch it." << name;
|
||||||
|
QProcess process;
|
||||||
|
return process.startDetached(QString("kylin-software-center -find %1").arg(name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AppSearch::AppSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, const QString &keyword, size_t uniqueSymbol)
|
||||||
|
{
|
||||||
|
this->setAutoDelete(true);
|
||||||
|
m_search_result = searchResult;
|
||||||
|
m_keyword = keyword;
|
||||||
|
m_uniqueSymbol = uniqueSymbol;
|
||||||
|
}
|
||||||
|
|
||||||
|
AppSearch::~AppSearch()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void AppSearch::run()
|
||||||
|
{
|
||||||
|
//These weird code is mean to be compatible with the old version UI.
|
||||||
|
AppMatch::getAppMatch()->startMatchApp(m_keyword, m_installed_apps, m_not_installed_apps);
|
||||||
|
QMapIterator<NameString, QStringList> i(m_installed_apps);
|
||||||
|
while (i.hasNext()) {
|
||||||
|
i.next();
|
||||||
|
SearchPluginIface::ResultInfo ri;
|
||||||
|
if(!QIcon::fromTheme(i.value().at(1)).isNull()) {
|
||||||
|
ri.icon = QIcon::fromTheme(i.value().at(1));
|
||||||
|
}else {
|
||||||
|
ri.icon = QIcon(":/res/icons/desktop.png");
|
||||||
|
}
|
||||||
|
ri.name = i.key().app_name;
|
||||||
|
ri.actionKey = i.value().at(0);
|
||||||
|
ri.type = 0; //0 means installed apps.
|
||||||
|
if (m_uniqueSymbol == AppSearchPlugin::uniqueSymbol) {
|
||||||
|
m_search_result->enqueue(ri);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
QMapIterator<NameString, QStringList> in(m_not_installed_apps);
|
||||||
|
while (in.hasNext()) {
|
||||||
|
in.next();
|
||||||
|
SearchPluginIface::ResultInfo ri;
|
||||||
|
if(!QIcon(in.value().at(1)).isNull()) {
|
||||||
|
ri.icon = QIcon(in.value().at(1));
|
||||||
|
}else {
|
||||||
|
ri.icon = QIcon(":/res/icons/desktop.png");
|
||||||
|
}
|
||||||
|
ri.name = in.key().app_name;
|
||||||
|
SearchPluginIface::DescriptionInfo di;
|
||||||
|
di.key = QString(tr("Application Description:"));
|
||||||
|
di.value = in.value().at(3);
|
||||||
|
ri.description.append(di);
|
||||||
|
ri.actionKey = in.value().at(2);
|
||||||
|
ri.type = 1; //1 means not installed apps.
|
||||||
|
if (m_uniqueSymbol == AppSearchPlugin::uniqueSymbol) {
|
||||||
|
m_search_result->enqueue(ri);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
#ifndef APPSEARCHPLUGIN_H
|
||||||
|
#define APPSEARCHPLUGIN_H
|
||||||
|
|
||||||
|
#include <QObject>
|
||||||
|
#include "search-plugin-iface.h"
|
||||||
|
#include "app-match.h"
|
||||||
|
#include "libsearch_global.h"
|
||||||
|
namespace Zeeker {
|
||||||
|
class LIBSEARCH_EXPORT AppSearchPlugin : public QObject, public SearchPluginIface
|
||||||
|
{
|
||||||
|
friend class AppSearch;
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
AppSearchPlugin(QObject *parent = nullptr);
|
||||||
|
PluginType pluginType() {return PluginType::SearchPlugin;}
|
||||||
|
const QString name();
|
||||||
|
const QString description();
|
||||||
|
const QIcon icon() {return QIcon::fromTheme("appsearch");}
|
||||||
|
void setEnable(bool enable) {m_enable = enable;}
|
||||||
|
bool isEnable() {return m_enable;}
|
||||||
|
QString getPluginName();
|
||||||
|
|
||||||
|
void KeywordSearch(QString keyword,DataQueue<ResultInfo> *searchResult);
|
||||||
|
QList<SearchPluginIface::Actioninfo> getActioninfo(int type);
|
||||||
|
void openAction(int actionkey, QString key, int type);
|
||||||
|
bool isPreviewEnable(QString key, int type);
|
||||||
|
QWidget *previewPage(QString key, int type, QWidget *parent);
|
||||||
|
private:
|
||||||
|
bool launch(const QString &path);
|
||||||
|
bool addPanelShortcut(const QString &path);
|
||||||
|
bool addDesktopShortcut(const QString &path);
|
||||||
|
bool installAppAction(const QString &name);
|
||||||
|
bool m_enable = true;
|
||||||
|
QList<SearchPluginIface::Actioninfo> m_actionInfo_installed;
|
||||||
|
QList<SearchPluginIface::Actioninfo> m_actionInfo_not_installed;
|
||||||
|
QThreadPool m_pool;
|
||||||
|
static size_t uniqueSymbol;
|
||||||
|
static QMutex m_mutex;
|
||||||
|
};
|
||||||
|
|
||||||
|
class AppSearch : public QObject, public QRunnable {
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
AppSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, const QString& keyword, size_t uniqueSymbol);
|
||||||
|
~AppSearch();
|
||||||
|
protected:
|
||||||
|
void run() override;
|
||||||
|
private:
|
||||||
|
DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr;
|
||||||
|
size_t m_uniqueSymbol;
|
||||||
|
QString m_keyword;
|
||||||
|
QMap<NameString, QStringList> m_installed_apps;
|
||||||
|
QMap<NameString, QStringList> m_not_installed_apps;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // APPSEARCHPLUGIN_H
|
|
@ -2,6 +2,8 @@ INCLUDEPATH += $$PWD
|
||||||
|
|
||||||
HEADERS += \
|
HEADERS += \
|
||||||
$$PWD/app-match.h \
|
$$PWD/app-match.h \
|
||||||
|
$$PWD/app-search-plugin.h
|
||||||
|
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
$$PWD/app-match.cpp \
|
$$PWD/app-match.cpp \
|
||||||
|
$$PWD/app-search-plugin.cpp
|
||||||
|
|
|
@ -1,7 +1,25 @@
|
||||||
#ifndef COMMON_H
|
#ifndef COMMON_H
|
||||||
#define COMMON_H
|
#define COMMON_H
|
||||||
|
#include <QMap>
|
||||||
#define UKUI_SEARCH_PIPE_PATH (QDir::homePath()+"/.config/org.ukui/ukui-search/ukuisearch").toLocal8Bit().constData()
|
#define UKUI_SEARCH_PIPE_PATH (QDir::homePath()+"/.config/org.ukui/ukui-search/ukuisearch").toLocal8Bit().constData()
|
||||||
|
#define FILE_SEARCH_VALUE "0"
|
||||||
|
#define DIR_SEARCH_VALUE "1"
|
||||||
|
|
||||||
|
#define HOME_PATH QDir::homePath()
|
||||||
|
static const QMap<QString, bool> targetFileTypeMap = {
|
||||||
|
std::map<QString, bool>::value_type("doc", true),
|
||||||
|
std::map<QString, bool>::value_type("docx", true),
|
||||||
|
std::map<QString, bool>::value_type("ppt", true),
|
||||||
|
std::map<QString, bool>::value_type("pptx", true),
|
||||||
|
std::map<QString, bool>::value_type("xls", true),
|
||||||
|
std::map<QString, bool>::value_type("xlsx", true),
|
||||||
|
std::map<QString, bool>::value_type("txt", true),
|
||||||
|
std::map<QString, bool>::value_type("dot", true),
|
||||||
|
std::map<QString, bool>::value_type("wps", true),
|
||||||
|
std::map<QString, bool>::value_type("pps", true),
|
||||||
|
std::map<QString, bool>::value_type("dps", true),
|
||||||
|
std::map<QString, bool>::value_type("et", true),
|
||||||
|
std::map<QString, bool>::value_type("pdf", true)
|
||||||
|
};
|
||||||
//TODO Put things that needed to be put here here.
|
//TODO Put things that needed to be put here here.
|
||||||
#endif // COMMON_H
|
#endif // COMMON_H
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
|
#include <QXmlStreamReader>
|
||||||
|
|
||||||
using namespace Zeeker;
|
using namespace Zeeker;
|
||||||
size_t FileUtils::_max_index_count = 0;
|
size_t FileUtils::_max_index_count = 0;
|
||||||
|
@ -177,6 +178,22 @@ QString FileUtils::getSettingName(const QString& setting) {
|
||||||
return setting.right(setting.length() - setting.lastIndexOf("/") - 1);
|
return setting.right(setting.length() - setting.lastIndexOf("/") - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool FileUtils::isOrUnder(QString pathA, QString pathB)
|
||||||
|
{
|
||||||
|
if(pathA[0] != "/")
|
||||||
|
pathA.prepend("/");
|
||||||
|
if(pathB[0] != "/")
|
||||||
|
pathB.prepend("/");
|
||||||
|
|
||||||
|
if(pathA.length() < pathB.length())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if(pathA == pathB || pathA.startsWith(pathB + "/"))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void FileUtils::loadHanziTable(const QString &fileName) {
|
void FileUtils::loadHanziTable(const QString &fileName) {
|
||||||
QFile file(fileName);
|
QFile file(fileName);
|
||||||
|
@ -482,12 +499,30 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
|
||||||
if(!file.open(QuaZip::mdUnzip))
|
if(!file.open(QuaZip::mdUnzip))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive))
|
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) {
|
||||||
|
file.close();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
QuaZipFile fileR(&file);
|
QuaZipFile fileR(&file);
|
||||||
|
|
||||||
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
||||||
|
|
||||||
|
QXmlStreamReader reader(&fileR);
|
||||||
|
|
||||||
|
while (!reader.atEnd()){
|
||||||
|
if(reader.readNextStartElement() and reader.name().toString() == "t"){
|
||||||
|
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
||||||
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fileR.close();
|
||||||
|
file.close();
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* //原加载DOM文档方式;
|
||||||
QDomDocument doc;
|
QDomDocument doc;
|
||||||
doc.setContent(fileR.readAll());
|
doc.setContent(fileR.readAll());
|
||||||
fileR.close();
|
fileR.close();
|
||||||
|
@ -499,7 +534,7 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
|
||||||
QDomElement wr = wp.firstChildElement("w:r");
|
QDomElement wr = wp.firstChildElement("w:r");
|
||||||
while(!wr.isNull()) {
|
while(!wr.isNull()) {
|
||||||
QDomElement wt = wr.firstChildElement("w:t");
|
QDomElement wt = wr.firstChildElement("w:t");
|
||||||
textcontent.append(wt.text().replace("\n", ""));
|
textcontent.append(wt.text().replace("\n", "")).replace("\r", " ");
|
||||||
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
|
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
|
||||||
file.close();
|
file.close();
|
||||||
return;
|
return;
|
||||||
|
@ -512,6 +547,7 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
|
||||||
}
|
}
|
||||||
file.close();
|
file.close();
|
||||||
return;
|
return;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
|
void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
|
||||||
|
@ -527,8 +563,35 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
|
||||||
if(i.startsWith(prefix))
|
if(i.startsWith(prefix))
|
||||||
fileList << i;
|
fileList << i;
|
||||||
}
|
}
|
||||||
if(fileList.isEmpty())
|
if(fileList.isEmpty()) {
|
||||||
|
file.close();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < fileList.size(); ++i){
|
||||||
|
QString name = prefix + QString::number(i + 1) + ".xml";
|
||||||
|
if(!file.setCurrentFile(name)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
QuaZipFile fileR(&file);
|
||||||
|
fileR.open(QIODevice::ReadOnly);
|
||||||
|
|
||||||
|
QXmlStreamReader reader(&fileR);
|
||||||
|
|
||||||
|
while (!reader.atEnd()){
|
||||||
|
if(reader.readNextStartElement() and reader.name().toString() == "t"){
|
||||||
|
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
||||||
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fileR.close();
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
QDomElement sptree;
|
QDomElement sptree;
|
||||||
QDomElement sp;
|
QDomElement sp;
|
||||||
QDomElement txbody;
|
QDomElement txbody;
|
||||||
|
@ -596,6 +659,7 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
|
||||||
}
|
}
|
||||||
file.close();
|
file.close();
|
||||||
return;
|
return;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
|
void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
|
||||||
|
@ -606,12 +670,30 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
|
||||||
if(!file.open(QuaZip::mdUnzip))
|
if(!file.open(QuaZip::mdUnzip))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive))
|
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) {
|
||||||
|
file.close();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
QuaZipFile fileR(&file);
|
QuaZipFile fileR(&file);
|
||||||
|
|
||||||
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
fileR.open(QIODevice::ReadOnly);
|
||||||
|
|
||||||
|
QXmlStreamReader reader(&fileR);
|
||||||
|
|
||||||
|
while (!reader.atEnd()){
|
||||||
|
if(reader.readNextStartElement() and reader.name().toString() == "t"){
|
||||||
|
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
||||||
|
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fileR.close();
|
||||||
|
file.close();
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
QDomDocument doc;
|
QDomDocument doc;
|
||||||
doc.setContent(fileR.readAll());
|
doc.setContent(fileR.readAll());
|
||||||
fileR.close();
|
fileR.close();
|
||||||
|
@ -641,16 +723,19 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
|
||||||
}
|
}
|
||||||
file.close();
|
file.close();
|
||||||
return;
|
return;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
|
void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
|
||||||
Poppler::Document *doc = Poppler::Document::load(path);
|
Poppler::Document *doc = Poppler::Document::load(path);
|
||||||
if(doc->isLocked())
|
if(doc->isLocked()) {
|
||||||
|
delete doc;
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
const QRectF qf;
|
const QRectF qf;
|
||||||
int pageNum = doc->numPages();
|
int pageNum = doc->numPages();
|
||||||
for(int i = 0; i < pageNum; ++i) {
|
for(int i = 0; i < pageNum; ++i) {
|
||||||
textcontent.append(doc->page(i)->text(qf).replace("\n", ""));
|
textcontent.append(doc->page(i)->text(qf).replace("\n", "").replace("\r", " "));
|
||||||
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
|
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -679,7 +764,7 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent) {
|
||||||
stream.setCodec(codec);
|
stream.setCodec(codec);
|
||||||
uchardet_delete(chardet);
|
uchardet_delete(chardet);
|
||||||
|
|
||||||
textcontent = stream.readAll().replace("\n", "");
|
textcontent = stream.readAll().replace("\n", "").replace("\r", " ");
|
||||||
|
|
||||||
file.close();
|
file.close();
|
||||||
encodedString.clear();
|
encodedString.clear();
|
||||||
|
@ -688,3 +773,18 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent) {
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool FileUtils::openFile(QString &path, bool openInDir)
|
||||||
|
{
|
||||||
|
if(openInDir) {
|
||||||
|
return QDesktopServices::openUrl(QUrl::fromLocalFile(path.left(path.lastIndexOf("/"))));
|
||||||
|
} else {
|
||||||
|
return QDesktopServices::openUrl(QUrl::fromLocalFile(path));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FileUtils::copyPath(QString &path)
|
||||||
|
{
|
||||||
|
QApplication::clipboard()->setText(path);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
|
@ -35,6 +35,9 @@
|
||||||
#include <QMap>
|
#include <QMap>
|
||||||
#include <QDomDocument>
|
#include <QDomDocument>
|
||||||
#include <QQueue>
|
#include <QQueue>
|
||||||
|
#include <QDesktopServices>
|
||||||
|
#include <QApplication>
|
||||||
|
#include <QClipboard>
|
||||||
|
|
||||||
#include <quazip/quazipfile.h>
|
#include <quazip/quazipfile.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -67,6 +70,8 @@ public:
|
||||||
static QString getFileName(const QString &);
|
static QString getFileName(const QString &);
|
||||||
static QString getAppName(const QString &);
|
static QString getAppName(const QString &);
|
||||||
static QString getSettingName(const QString &);
|
static QString getSettingName(const QString &);
|
||||||
|
//A is or under B
|
||||||
|
static bool isOrUnder(QString pathA, QString pathB);
|
||||||
|
|
||||||
//chinese character to pinyin
|
//chinese character to pinyin
|
||||||
static QMap<QString, QStringList> map_chinese2pinyin;
|
static QMap<QString, QStringList> map_chinese2pinyin;
|
||||||
|
@ -81,6 +86,9 @@ public:
|
||||||
static void getXlsxTextContent(QString &path, QString &textcontent);
|
static void getXlsxTextContent(QString &path, QString &textcontent);
|
||||||
static void getPdfTextContent(QString &path, QString &textcontent);
|
static void getPdfTextContent(QString &path, QString &textcontent);
|
||||||
static void getTxtContent(QString &path, QString &textcontent);
|
static void getTxtContent(QString &path, QString &textcontent);
|
||||||
|
|
||||||
|
static bool openFile(QString &path, bool openInDir = false);
|
||||||
|
static bool copyPath(QString &path);
|
||||||
static size_t _max_index_count;
|
static size_t _max_index_count;
|
||||||
static size_t _current_index_count; //this one has been Abandoned,do not use it.
|
static size_t _current_index_count; //this one has been Abandoned,do not use it.
|
||||||
static unsigned short _index_status;
|
static unsigned short _index_status;
|
||||||
|
|
|
@ -83,13 +83,13 @@ GlobalSettings::GlobalSettings(QObject *parent) : QObject(parent) {
|
||||||
connect(m_theme_gsettings, &QGSettings::changed, this, [ = ](const QString & key) {
|
connect(m_theme_gsettings, &QGSettings::changed, this, [ = ](const QString & key) {
|
||||||
if(key == STYLE_NAME_KEY) {
|
if(key == STYLE_NAME_KEY) {
|
||||||
//当前主题改变时也发出paletteChanged信号,通知主界面刷新
|
//当前主题改变时也发出paletteChanged信号,通知主界面刷新
|
||||||
qApp->paletteChanged(qApp->palette());
|
|
||||||
m_cache.remove(STYLE_NAME_KEY);
|
m_cache.remove(STYLE_NAME_KEY);
|
||||||
m_cache.insert(STYLE_NAME_KEY, m_theme_gsettings->get(STYLE_NAME_KEY).toString());
|
m_cache.insert(STYLE_NAME_KEY, m_theme_gsettings->get(STYLE_NAME_KEY).toString());
|
||||||
} else if(key == FONT_SIZE_KEY) {
|
|
||||||
qApp->paletteChanged(qApp->palette());
|
qApp->paletteChanged(qApp->palette());
|
||||||
|
} else if(key == FONT_SIZE_KEY) {
|
||||||
m_cache.remove(FONT_SIZE_KEY);
|
m_cache.remove(FONT_SIZE_KEY);
|
||||||
m_cache.insert(FONT_SIZE_KEY, m_theme_gsettings->get(FONT_SIZE_KEY).toDouble());
|
m_cache.insert(FONT_SIZE_KEY, m_theme_gsettings->get(FONT_SIZE_KEY).toDouble());
|
||||||
|
qApp->paletteChanged(qApp->palette());
|
||||||
} else if (key == ICON_THEME_KEY) {
|
} else if (key == ICON_THEME_KEY) {
|
||||||
qApp->paletteChanged(qApp->palette());
|
qApp->paletteChanged(qApp->palette());
|
||||||
}
|
}
|
||||||
|
@ -146,24 +146,28 @@ bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool rem
|
||||||
m_block_dirs_settings->remove(path);
|
m_block_dirs_settings->remove(path);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if(!path.startsWith("/home")) {
|
// if(!path.startsWith("/home")) {
|
||||||
// returnCode = QString(tr("I can only search your user directory, it doesn't make any sense if you block an directory which is not in user directory!"));
|
// returnCode = QString(tr("I can only search your user directory, it doesn't make any sense if you block an directory which is not in user directory!"));
|
||||||
returnCode = PATH_NOT_IN_HOME;
|
// returnCode = PATH_NOT_IN_HOME;
|
||||||
return false;
|
// return false;
|
||||||
}
|
// }
|
||||||
|
|
||||||
//why QSetting's key can't start with "/"??
|
//why QSetting's key can't start with "/"??
|
||||||
QString pathKey = path.right(path.length() - 1);
|
QString pathKey = path.right(path.length() - 1);
|
||||||
|
|
||||||
|
if (pathKey.endsWith(QLatin1Char('/'))) {
|
||||||
|
pathKey = pathKey.mid(0, pathKey.length() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
QStringList blockDirs = m_block_dirs_settings->allKeys();
|
QStringList blockDirs = m_block_dirs_settings->allKeys();
|
||||||
for(QString i : blockDirs) {
|
for(QString i : blockDirs) {
|
||||||
if(pathKey.startsWith(i)) {
|
if(FileUtils::isOrUnder(pathKey, i)) {
|
||||||
// returnCode = QString(tr("My parent folder has been blocked!"));
|
// returnCode = QString(tr("My parent folder has been blocked!"));
|
||||||
returnCode = PATH_PARENT_BLOCKED;
|
returnCode = PATH_PARENT_BLOCKED;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(i.startsWith(pathKey))
|
if(FileUtils::isOrUnder(i, pathKey))
|
||||||
m_block_dirs_settings->remove(i);
|
m_block_dirs_settings->remove(i);
|
||||||
}
|
}
|
||||||
m_block_dirs_settings->setValue(pathKey, "0");
|
m_block_dirs_settings->setValue(pathKey, "0");
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
#include <QDBusInterface>
|
#include <QDBusInterface>
|
||||||
#include <QApplication>
|
#include <QApplication>
|
||||||
#include "libsearch_global.h"
|
#include "libsearch_global.h"
|
||||||
|
#include "file-utils.h"
|
||||||
|
|
||||||
#define CONTROL_CENTER_PERSONALISE_GSETTINGS_ID "org.ukui.control-center.personalise"
|
#define CONTROL_CENTER_PERSONALISE_GSETTINGS_ID "org.ukui.control-center.personalise"
|
||||||
#define TRANSPARENCY_KEY "transparency"
|
#define TRANSPARENCY_KEY "transparency"
|
||||||
|
|
|
@ -34,8 +34,8 @@ ConstructDocumentForPath::ConstructDocumentForPath(QVector<QString> list) {
|
||||||
|
|
||||||
void ConstructDocumentForPath::run() {
|
void ConstructDocumentForPath::run() {
|
||||||
// qDebug()<<"ConstructDocumentForPath";
|
// qDebug()<<"ConstructDocumentForPath";
|
||||||
if(!Zeeker::_doc_list_path)
|
// if(!Zeeker::_doc_list_path)
|
||||||
Zeeker::_doc_list_path = new QList<Document>;
|
// Zeeker::_doc_list_path = new QVector<Document>;
|
||||||
// qDebug()<<_doc_list_path->size();
|
// qDebug()<<_doc_list_path->size();
|
||||||
QString index_text = m_list.at(0).toLower();
|
QString index_text = m_list.at(0).toLower();
|
||||||
QString sourcePath = m_list.at(1);
|
QString sourcePath = m_list.at(1);
|
||||||
|
@ -87,9 +87,9 @@ void ConstructDocumentForPath::run() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
|
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
|
||||||
Zeeker::_mutex_doc_list_path.lock();
|
IndexGenerator::_mutex_doc_list_path.lock();
|
||||||
Zeeker::_doc_list_path->append(doc);
|
IndexGenerator::_doc_list_path.append(doc);
|
||||||
Zeeker::_mutex_doc_list_path.unlock();
|
IndexGenerator::_mutex_doc_list_path.unlock();
|
||||||
// qDebug()<<"ConstructDocumentForPath finish";
|
// qDebug()<<"ConstructDocumentForPath finish";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -102,32 +102,39 @@ ConstructDocumentForContent::ConstructDocumentForContent(QString path) {
|
||||||
void ConstructDocumentForContent::run() {
|
void ConstructDocumentForContent::run() {
|
||||||
// qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
|
// qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
|
||||||
// 构造文本索引的document
|
// 构造文本索引的document
|
||||||
if(!Zeeker::_doc_list_content)
|
// if(!Zeeker::_doc_list_content)
|
||||||
Zeeker::_doc_list_content = new QList<Document>;
|
// Zeeker::_doc_list_content = new QVector<Document>;
|
||||||
QString content;
|
QString content;
|
||||||
FileReader::getTextContent(m_path, content);
|
FileReader::getTextContent(m_path, content);
|
||||||
if(content.isEmpty())
|
if(content.isEmpty())
|
||||||
return;
|
return;
|
||||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
//QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
||||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
//QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
||||||
|
|
||||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000).toStdString());
|
|
||||||
|
|
||||||
Document doc;
|
Document doc;
|
||||||
doc.setData(content);
|
doc.setData(content);
|
||||||
doc.setUniqueTerm(uniqueterm);
|
//doc.setUniqueTerm(uniqueterm);
|
||||||
doc.addTerm(upTerm);
|
doc.setUniqueTerm(FileUtils::makeDocUterm(m_path));
|
||||||
|
//doc.addTerm(upTerm);
|
||||||
|
doc.addTerm(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
||||||
doc.addValue(m_path);
|
doc.addValue(m_path);
|
||||||
for(int i = 0; i < term.size(); ++i) {
|
|
||||||
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
|
|
||||||
|
|
||||||
|
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
||||||
|
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
||||||
|
|
||||||
|
// QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
|
||||||
|
std::vector<cppjieba::KeyWord> term = ChineseSegmentation::getInstance()->callSegementStd(content.left(20480000).toStdString());
|
||||||
|
|
||||||
|
for(size_t i = 0; i < term.size(); ++i) {
|
||||||
|
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
|
||||||
}
|
}
|
||||||
|
|
||||||
Zeeker::_mutex_doc_list_content.lock();
|
IndexGenerator::_mutex_doc_list_content.lock();
|
||||||
Zeeker::_doc_list_content->append(doc);
|
IndexGenerator::_doc_list_content.append(doc);
|
||||||
Zeeker::_mutex_doc_list_content.unlock();
|
IndexGenerator::_mutex_doc_list_content.unlock();
|
||||||
content.clear();
|
content.clear();
|
||||||
content.squeeze();
|
content.squeeze();
|
||||||
|
|
||||||
term.clear();
|
term.clear();
|
||||||
|
term.shrink_to_fit();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
#include "data-queue.h"
|
||||||
|
|
||||||
|
DataQueue::DataQueue()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
#ifndef DATAQUEUE_H
|
||||||
|
#define DATAQUEUE_H
|
||||||
|
|
||||||
|
|
||||||
|
class DataQueue
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
DataQueue();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // DATAQUEUE_H
|
|
@ -37,6 +37,17 @@ void Document::addPosting(std::string term, QVector<size_t> offset, int weight)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Document::addPosting(std::string &term, std::vector<size_t> &offset, int weight) {
|
||||||
|
if(term == "")
|
||||||
|
return;
|
||||||
|
if(term.length() > 240)
|
||||||
|
term = QString::fromStdString(term).left(30).toStdString();
|
||||||
|
|
||||||
|
for(size_t i : offset) {
|
||||||
|
m_document.add_posting(term, i, weight);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Document::addPosting(std::string term, unsigned int offset, int weight) {
|
void Document::addPosting(std::string term, unsigned int offset, int weight) {
|
||||||
if(term == "")
|
if(term == "")
|
||||||
return;
|
return;
|
||||||
|
@ -52,6 +63,12 @@ void Document::addTerm(QString term) {
|
||||||
m_document.add_term(term.toStdString());
|
m_document.add_term(term.toStdString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Document::addTerm(std::string term) {
|
||||||
|
if(term.empty())
|
||||||
|
return;
|
||||||
|
m_document.add_term(term);
|
||||||
|
}
|
||||||
|
|
||||||
void Document::addValue(QString value) {
|
void Document::addValue(QString value) {
|
||||||
m_document.add_value(1, value.toStdString());
|
m_document.add_value(1, value.toStdString());
|
||||||
}
|
}
|
||||||
|
@ -62,12 +79,20 @@ void Document::setUniqueTerm(QString term) {
|
||||||
m_document.add_term(term.toStdString());
|
m_document.add_term(term.toStdString());
|
||||||
|
|
||||||
// m_unique_term = new QString(term);
|
// m_unique_term = new QString(term);
|
||||||
m_unique_term = std::move(term);
|
m_unique_term = std::move(term.toStdString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Document::setUniqueTerm(std::string term) {
|
||||||
|
if(term.empty())
|
||||||
|
return;
|
||||||
|
m_document.add_term(term);
|
||||||
|
m_unique_term = term;
|
||||||
|
}
|
||||||
|
|
||||||
std::string Document::getUniqueTerm() {
|
std::string Document::getUniqueTerm() {
|
||||||
// qDebug()<<"m_unique_term!"<<*m_unique_term;
|
// qDebug()<<"m_unique_term!"<<*m_unique_term;
|
||||||
// qDebug() << QString::fromStdString(m_unique_term.toStdString());
|
// qDebug() << QString::fromStdString(m_unique_term.toStdString());
|
||||||
return m_unique_term.toStdString();
|
return m_unique_term;//.toStdString();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Document::setIndexText(QStringList indexText) {
|
void Document::setIndexText(QStringList indexText) {
|
||||||
|
|
|
@ -41,10 +41,13 @@ public:
|
||||||
}
|
}
|
||||||
void setData(QString &data);
|
void setData(QString &data);
|
||||||
void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
|
void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
|
||||||
|
void addPosting(std::string &term, std::vector<size_t> &offset, int weight = 1);
|
||||||
void addPosting(std::string term, unsigned int offset, int weight = 1);
|
void addPosting(std::string term, unsigned int offset, int weight = 1);
|
||||||
void addTerm(QString term);
|
void addTerm(QString term);
|
||||||
|
void addTerm(std::string term);
|
||||||
void addValue(QString value);
|
void addValue(QString value);
|
||||||
void setUniqueTerm(QString term);
|
void setUniqueTerm(QString term);
|
||||||
|
void setUniqueTerm(std::string term);
|
||||||
std::string getUniqueTerm();
|
std::string getUniqueTerm();
|
||||||
void setIndexText(QStringList indexText);
|
void setIndexText(QStringList indexText);
|
||||||
QStringList getIndexText();
|
QStringList getIndexText();
|
||||||
|
@ -52,7 +55,8 @@ public:
|
||||||
private:
|
private:
|
||||||
Xapian::Document m_document;
|
Xapian::Document m_document;
|
||||||
QStringList m_index_text;
|
QStringList m_index_text;
|
||||||
QString m_unique_term;
|
//QString m_unique_term;
|
||||||
|
std::string m_unique_term;
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,232 @@
|
||||||
|
#include "file-search-plugin.h"
|
||||||
|
#include "search-manager.h"
|
||||||
|
#include <QWidget>
|
||||||
|
#include <QLabel>
|
||||||
|
#include <QHBoxLayout>
|
||||||
|
using namespace Zeeker;
|
||||||
|
|
||||||
|
FileSearchPlugin::FileSearchPlugin(QObject *parent) : QObject(parent)
|
||||||
|
{
|
||||||
|
SearchPluginIface::Actioninfo open { 0, tr("Open")};
|
||||||
|
SearchPluginIface::Actioninfo Openpath { 1, tr("Open path")};
|
||||||
|
SearchPluginIface::Actioninfo CopyPath { 2, tr("Copy Path")};
|
||||||
|
m_actionInfo << open << Openpath << CopyPath;
|
||||||
|
m_pool.setMaxThreadCount(2);
|
||||||
|
m_pool.setExpiryTimeout(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
const QString FileSearchPlugin::name()
|
||||||
|
{
|
||||||
|
return tr("File Search");
|
||||||
|
}
|
||||||
|
|
||||||
|
const QString FileSearchPlugin::description()
|
||||||
|
{
|
||||||
|
return tr("File search.");
|
||||||
|
}
|
||||||
|
|
||||||
|
QString FileSearchPlugin::getPluginName()
|
||||||
|
{
|
||||||
|
return tr("File Search");
|
||||||
|
}
|
||||||
|
|
||||||
|
void Zeeker::FileSearchPlugin::KeywordSearch(QString keyword, DataQueue<ResultInfo> *searchResult)
|
||||||
|
{
|
||||||
|
SearchManager::m_mutex1.lock();
|
||||||
|
++SearchManager::uniqueSymbol1;
|
||||||
|
SearchManager::m_mutex1.unlock();
|
||||||
|
|
||||||
|
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
|
||||||
|
DirectSearch *directSearch;
|
||||||
|
directSearch = new DirectSearch(keyword, searchResult, FILE_SEARCH_VALUE, SearchManager::uniqueSymbol1);
|
||||||
|
m_pool.start(directSearch);
|
||||||
|
} else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
|
||||||
|
FileSearch *filesearch;
|
||||||
|
filesearch = new FileSearch(searchResult, SearchManager::uniqueSymbol1, keyword, FILE_SEARCH_VALUE, 1, 0, 5);
|
||||||
|
m_pool.start(filesearch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<SearchPluginIface::Actioninfo> FileSearchPlugin::getActioninfo(int type)
|
||||||
|
{
|
||||||
|
return m_actionInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FileSearchPlugin::openAction(int actionkey, QString key, int type)
|
||||||
|
{
|
||||||
|
//TODO add some return message here.
|
||||||
|
switch (actionkey) {
|
||||||
|
case 0:
|
||||||
|
FileUtils::openFile(key);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
FileUtils::openFile(key, true);
|
||||||
|
case 2:
|
||||||
|
FileUtils::copyPath(key);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FileSearchPlugin::isPreviewEnable(QString key, int type)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
QWidget *FileSearchPlugin::previewPage(QString key, int type, QWidget *parent)
|
||||||
|
{
|
||||||
|
QWidget *previewPage = new QWidget(parent);
|
||||||
|
QHBoxLayout * previewLyt = new QHBoxLayout(previewPage);
|
||||||
|
previewLyt->setContentsMargins(0, 0, 0, 0);
|
||||||
|
QLabel *label = new QLabel(previewPage);
|
||||||
|
previewLyt->addWidget(label);
|
||||||
|
label->setFixedHeight(120);
|
||||||
|
previewPage->setFixedSize(120,120);
|
||||||
|
previewLyt->setAlignment(Qt::AlignCenter);
|
||||||
|
label->setPixmap(FileUtils::getFileIcon(QUrl::fromLocalFile(key).toString()).pixmap(120,120));
|
||||||
|
return previewPage;
|
||||||
|
}
|
||||||
|
|
||||||
|
DirSearchPlugin::DirSearchPlugin(QObject *parent) : QObject(parent)
|
||||||
|
{
|
||||||
|
SearchPluginIface::Actioninfo open { 0, tr("Open")};
|
||||||
|
SearchPluginIface::Actioninfo Openpath { 1, tr("Open path")};
|
||||||
|
SearchPluginIface::Actioninfo CopyPath { 2, tr("Copy Path")};
|
||||||
|
m_actionInfo << open << Openpath << CopyPath;
|
||||||
|
m_pool.setMaxThreadCount(2);
|
||||||
|
m_pool.setExpiryTimeout(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
const QString DirSearchPlugin::name()
|
||||||
|
{
|
||||||
|
return tr("Dir Search");
|
||||||
|
}
|
||||||
|
|
||||||
|
const QString DirSearchPlugin::description()
|
||||||
|
{
|
||||||
|
return tr("Dir search.");
|
||||||
|
}
|
||||||
|
|
||||||
|
QString DirSearchPlugin::getPluginName()
|
||||||
|
{
|
||||||
|
return tr("Dir Search");
|
||||||
|
}
|
||||||
|
|
||||||
|
void Zeeker::DirSearchPlugin::KeywordSearch(QString keyword, DataQueue<ResultInfo> *searchResult)
|
||||||
|
{
|
||||||
|
SearchManager::m_mutex2.lock();
|
||||||
|
++SearchManager::uniqueSymbol2;
|
||||||
|
SearchManager::m_mutex2.unlock();
|
||||||
|
|
||||||
|
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
|
||||||
|
DirectSearch *directSearch;
|
||||||
|
directSearch = new DirectSearch(keyword, searchResult, DIR_SEARCH_VALUE, SearchManager::uniqueSymbol2);
|
||||||
|
m_pool.start(directSearch);
|
||||||
|
} else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
|
||||||
|
FileSearch *filesearch;
|
||||||
|
filesearch = new FileSearch(searchResult, SearchManager::uniqueSymbol2, keyword, DIR_SEARCH_VALUE, 1, 0, 5);
|
||||||
|
m_pool.start(filesearch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<SearchPluginIface::Actioninfo> DirSearchPlugin::getActioninfo(int type)
|
||||||
|
{
|
||||||
|
return m_actionInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DirSearchPlugin::openAction(int actionkey, QString key, int type)
|
||||||
|
{
|
||||||
|
//TODO add some return message here.
|
||||||
|
switch (actionkey) {
|
||||||
|
case 0:
|
||||||
|
FileUtils::openFile(key);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
FileUtils::openFile(key, true);
|
||||||
|
case 2:
|
||||||
|
FileUtils::copyPath(key);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DirSearchPlugin::isPreviewEnable(QString key, int type)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
QWidget *DirSearchPlugin::previewPage(QString key, int type, QWidget *parent)
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
FileContengSearchPlugin::FileContengSearchPlugin(QObject *parent) : QObject(parent)
|
||||||
|
{
|
||||||
|
SearchPluginIface::Actioninfo open { 0, tr("Open")};
|
||||||
|
SearchPluginIface::Actioninfo Openpath { 1, tr("Open path")};
|
||||||
|
SearchPluginIface::Actioninfo CopyPath { 2, tr("Copy Path")};
|
||||||
|
m_actionInfo << open << Openpath << CopyPath;
|
||||||
|
m_pool.setMaxThreadCount(2);
|
||||||
|
m_pool.setExpiryTimeout(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
const QString FileContengSearchPlugin::name()
|
||||||
|
{
|
||||||
|
return tr("File Content Search");
|
||||||
|
}
|
||||||
|
|
||||||
|
const QString FileContengSearchPlugin::description()
|
||||||
|
{
|
||||||
|
return tr("File content search.");
|
||||||
|
}
|
||||||
|
|
||||||
|
QString FileContengSearchPlugin::getPluginName()
|
||||||
|
{
|
||||||
|
return tr("File content search");
|
||||||
|
}
|
||||||
|
|
||||||
|
void Zeeker::FileContengSearchPlugin::KeywordSearch(QString keyword, DataQueue<ResultInfo> *searchResult)
|
||||||
|
{
|
||||||
|
SearchManager::m_mutex3.lock();
|
||||||
|
++SearchManager::uniqueSymbol3;
|
||||||
|
SearchManager::m_mutex3.unlock();
|
||||||
|
|
||||||
|
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
|
||||||
|
return;
|
||||||
|
} else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
|
||||||
|
FileContentSearch *fileContentSearch;
|
||||||
|
fileContentSearch = new FileContentSearch(searchResult, SearchManager::uniqueSymbol3, keyword, 0, 5);
|
||||||
|
m_pool.start(fileContentSearch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
QList<SearchPluginIface::Actioninfo> FileContengSearchPlugin::getActioninfo(int type)
|
||||||
|
{
|
||||||
|
return m_actionInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FileContengSearchPlugin::openAction(int actionkey, QString key, int type)
|
||||||
|
{
|
||||||
|
//TODO add some return message here.
|
||||||
|
switch (actionkey) {
|
||||||
|
case 0:
|
||||||
|
FileUtils::openFile(key);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
FileUtils::openFile(key, true);
|
||||||
|
case 2:
|
||||||
|
FileUtils::copyPath(key);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FileContengSearchPlugin::isPreviewEnable(QString key, int type)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
QWidget *FileContengSearchPlugin::previewPage(QString key, int type, QWidget *parent)
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
|
@ -0,0 +1,87 @@
|
||||||
|
#ifndef FILESEARCHPLUGIN_H
|
||||||
|
#define FILESEARCHPLUGIN_H
|
||||||
|
|
||||||
|
#include <QObject>
|
||||||
|
#include <QThreadPool>
|
||||||
|
|
||||||
|
#include "libsearch_global.h"
|
||||||
|
#include "search-plugin-iface.h"
|
||||||
|
#include "common.h"
|
||||||
|
namespace Zeeker {
|
||||||
|
//internal plugin
|
||||||
|
class LIBSEARCH_EXPORT FileSearchPlugin : public QObject, public SearchPluginIface
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
FileSearchPlugin(QObject *parent = nullptr);
|
||||||
|
PluginType pluginType() {return PluginType::SearchPlugin;}
|
||||||
|
const QString name();
|
||||||
|
const QString description();
|
||||||
|
const QIcon icon() {return QIcon::fromTheme("folder");}
|
||||||
|
void setEnable(bool enable) {m_enable = enable;}
|
||||||
|
bool isEnable() {return m_enable;}
|
||||||
|
QString getPluginName();
|
||||||
|
|
||||||
|
void KeywordSearch(QString keyword,DataQueue<ResultInfo> *searchResult);
|
||||||
|
QList<SearchPluginIface::Actioninfo> getActioninfo(int type);
|
||||||
|
void openAction(int actionkey, QString key, int type = 0);
|
||||||
|
bool isPreviewEnable(QString key, int type);
|
||||||
|
QWidget *previewPage(QString key, int type, QWidget *parent = nullptr);
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool m_enable = true;
|
||||||
|
QList<SearchPluginIface::Actioninfo> m_actionInfo;
|
||||||
|
QThreadPool m_pool;
|
||||||
|
};
|
||||||
|
|
||||||
|
class LIBSEARCH_EXPORT DirSearchPlugin : public QObject, public SearchPluginIface
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
DirSearchPlugin(QObject *parent = nullptr);
|
||||||
|
PluginType pluginType() {return PluginType::SearchPlugin;}
|
||||||
|
const QString name();
|
||||||
|
const QString description();
|
||||||
|
const QIcon icon() {return QIcon::fromTheme("folder");}
|
||||||
|
void setEnable(bool enable) {m_enable = enable;}
|
||||||
|
bool isEnable() {return m_enable;}
|
||||||
|
QString getPluginName();
|
||||||
|
|
||||||
|
void KeywordSearch(QString keyword,DataQueue<ResultInfo> *searchResult);
|
||||||
|
QList<SearchPluginIface::Actioninfo> getActioninfo(int type);
|
||||||
|
void openAction(int actionkey, QString key, int type = 0);
|
||||||
|
bool isPreviewEnable(QString key, int type);
|
||||||
|
QWidget *previewPage(QString key, int type, QWidget *parent = nullptr);
|
||||||
|
private:
|
||||||
|
bool m_enable = true;
|
||||||
|
QList<SearchPluginIface::Actioninfo> m_actionInfo;
|
||||||
|
QThreadPool m_pool;
|
||||||
|
};
|
||||||
|
|
||||||
|
class LIBSEARCH_EXPORT FileContengSearchPlugin : public QObject, public SearchPluginIface
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
FileContengSearchPlugin(QObject *parent = nullptr);
|
||||||
|
PluginType pluginType() {return PluginType::SearchPlugin;}
|
||||||
|
const QString name();
|
||||||
|
const QString description();
|
||||||
|
const QIcon icon() {return QIcon::fromTheme("folder");}
|
||||||
|
void setEnable(bool enable) {m_enable = enable;}
|
||||||
|
bool isEnable() {return m_enable;}
|
||||||
|
QString getPluginName();
|
||||||
|
|
||||||
|
void KeywordSearch(QString keyword,DataQueue<ResultInfo> *searchResult);
|
||||||
|
QList<SearchPluginIface::Actioninfo> getActioninfo(int type);
|
||||||
|
void openAction(int actionkey, QString key, int type = 0);
|
||||||
|
bool isPreviewEnable(QString key, int type);
|
||||||
|
QWidget *previewPage(QString key, int type, QWidget *parent = nullptr);
|
||||||
|
private:
|
||||||
|
bool m_enable = true;
|
||||||
|
QList<SearchPluginIface::Actioninfo> m_actionInfo;
|
||||||
|
QThreadPool m_pool;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif // FILESEARCHPLUGIN_H
|
|
@ -26,6 +26,8 @@
|
||||||
//#define DELETE_QUEUE(a )
|
//#define DELETE_QUEUE(a )
|
||||||
using namespace Zeeker;
|
using namespace Zeeker;
|
||||||
FirstIndex::FirstIndex() {
|
FirstIndex::FirstIndex() {
|
||||||
|
m_pool.setMaxThreadCount(2);
|
||||||
|
m_pool.setExpiryTimeout(100);
|
||||||
}
|
}
|
||||||
|
|
||||||
FirstIndex::~FirstIndex() {
|
FirstIndex::~FirstIndex() {
|
||||||
|
@ -46,7 +48,48 @@ void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
|
||||||
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
|
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
|
||||||
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
|
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
|
||||||
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) {
|
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) {
|
||||||
this->q_content_index->enqueue(fileInfo.absoluteFilePath());
|
//this->q_content_index->enqueue(fileInfo.absoluteFilePath());
|
||||||
|
if (fileInfo.fileName().split(".").last() == "docx") {
|
||||||
|
QuaZip file(fileInfo.absoluteFilePath());
|
||||||
|
if(!file.open(QuaZip::mdUnzip))
|
||||||
|
return;
|
||||||
|
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive))
|
||||||
|
return;
|
||||||
|
QuaZipFile fileR(&file);
|
||||||
|
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//docx解压缩后的xml文件为实际需要解析文件大小
|
||||||
|
file.close();
|
||||||
|
} else if (fileInfo.fileName().split(".").last() == "pptx") {
|
||||||
|
QuaZip file(fileInfo.absoluteFilePath());
|
||||||
|
if(!file.open(QuaZip::mdUnzip))
|
||||||
|
return;
|
||||||
|
QString prefix("ppt/slides/slide");
|
||||||
|
qint64 fileSize(0);
|
||||||
|
qint64 fileIndex(0);
|
||||||
|
for(QString i : file.getFileNameList()) {
|
||||||
|
if(i.startsWith(prefix)){
|
||||||
|
QString name = prefix + QString::number(fileIndex + 1) + ".xml";
|
||||||
|
fileIndex++;
|
||||||
|
if(!file.setCurrentFile(name)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
QuaZipFile fileR(&file);
|
||||||
|
fileSize += fileR.usize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
file.close();
|
||||||
|
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileSize));//pptx解压缩后的xml文件为实际需要解析文件大小
|
||||||
|
} else if (fileInfo.fileName().split(".").last() == "xlsx") {
|
||||||
|
QuaZip file(fileInfo.absoluteFilePath());
|
||||||
|
if(!file.open(QuaZip::mdUnzip))
|
||||||
|
return;
|
||||||
|
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive))
|
||||||
|
return;
|
||||||
|
QuaZipFile fileR(&file);
|
||||||
|
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//xlsx解压缩后的xml文件为实际解析文件大小
|
||||||
|
file.close();
|
||||||
|
} else {
|
||||||
|
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,19 +97,6 @@ void FirstIndex::run() {
|
||||||
QTime t1 = QTime::currentTime();
|
QTime t1 = QTime::currentTime();
|
||||||
|
|
||||||
// Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running.
|
// Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running.
|
||||||
QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search");
|
|
||||||
if(!fifoDir.exists())
|
|
||||||
qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath());
|
|
||||||
|
|
||||||
unlink(UKUI_SEARCH_PIPE_PATH);
|
|
||||||
int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777);
|
|
||||||
if(retval == -1) {
|
|
||||||
qCritical() << "creat fifo error!!";
|
|
||||||
syslog(LOG_ERR, "creat fifo error!!\n");
|
|
||||||
assert(false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
qDebug() << "create fifo success\n";
|
|
||||||
|
|
||||||
QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString();
|
QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString();
|
||||||
QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString();
|
QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString();
|
||||||
|
@ -90,8 +120,9 @@ void FirstIndex::run() {
|
||||||
|
|
||||||
this->q_index = new QQueue<QVector<QString>>();
|
this->q_index = new QQueue<QVector<QString>>();
|
||||||
//this->q_content_index = new QQueue<QString>();
|
//this->q_content_index = new QQueue<QString>();
|
||||||
NEW_QUEUE(this->q_content_index);
|
//NEW_QUEUE(this->q_content_index);
|
||||||
// this->mlm = new MessageListManager();
|
// this->mlm = new MessageListManager();
|
||||||
|
this->q_content_index = new QQueue<QPair<QString,qint64>>();
|
||||||
|
|
||||||
int fifo_fd;
|
int fifo_fd;
|
||||||
char buffer[2];
|
char buffer[2];
|
||||||
|
@ -110,7 +141,6 @@ void FirstIndex::run() {
|
||||||
|
|
||||||
|
|
||||||
++FileUtils::_index_status;
|
++FileUtils::_index_status;
|
||||||
|
|
||||||
pid_t pid;
|
pid_t pid;
|
||||||
pid = fork();
|
pid = fork();
|
||||||
if(pid == 0) {
|
if(pid == 0) {
|
||||||
|
@ -129,6 +159,7 @@ void FirstIndex::run() {
|
||||||
p_indexGenerator = IndexGenerator::getInstance(true, this);
|
p_indexGenerator = IndexGenerator::getInstance(true, this);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
//TODO Fix these weird code.
|
||||||
QSemaphore sem(5);
|
QSemaphore sem(5);
|
||||||
QMutex mutex1, mutex2, mutex3;
|
QMutex mutex1, mutex2, mutex3;
|
||||||
mutex1.lock();
|
mutex1.lock();
|
||||||
|
@ -144,40 +175,53 @@ void FirstIndex::run() {
|
||||||
qDebug() << "max_index_count:" << FileUtils::_max_index_count;
|
qDebug() << "max_index_count:" << FileUtils::_max_index_count;
|
||||||
sem.release(5);
|
sem.release(5);
|
||||||
// });
|
// });
|
||||||
QtConcurrent::run([&]() {
|
QtConcurrent::run(&m_pool, [&]() {
|
||||||
sem.acquire(2);
|
sem.acquire(2);
|
||||||
mutex2.unlock();
|
mutex2.unlock();
|
||||||
qDebug() << "index start;";
|
qDebug() << "index start;";
|
||||||
QQueue<QVector<QString>>* tmp = new QQueue<QVector<QString>>();
|
QQueue<QVector<QString>>* tmp1 = new QQueue<QVector<QString>>();
|
||||||
while(!this->q_index->empty()) {
|
while(!this->q_index->empty()) {
|
||||||
for(size_t i = 0; (i < 8192) && (!this->q_index->empty()); ++i) {
|
for(size_t i = 0; (i < 8192) && (!this->q_index->empty()); ++i) {
|
||||||
tmp->enqueue(this->q_index->dequeue());
|
tmp1->enqueue(this->q_index->dequeue());
|
||||||
}
|
}
|
||||||
this->p_indexGenerator->creatAllIndex(tmp);
|
this->p_indexGenerator->creatAllIndex(tmp1);
|
||||||
tmp->clear();
|
tmp1->clear();
|
||||||
}
|
}
|
||||||
// this->p_indexGenerator->setSynonym();
|
delete tmp1;
|
||||||
delete tmp;
|
|
||||||
qDebug() << "index end;";
|
qDebug() << "index end;";
|
||||||
sem.release(2);
|
sem.release(2);
|
||||||
});
|
});
|
||||||
QtConcurrent::run([&]() {
|
QtConcurrent::run(&m_pool,[&]() {
|
||||||
sem.acquire(2);
|
sem.acquire(2);
|
||||||
mutex3.unlock();
|
mutex3.unlock();
|
||||||
QQueue<QString>* tmp = new QQueue<QString>();
|
QQueue<QString>* tmp2 = new QQueue<QString>();
|
||||||
qDebug() << "q_content_index:" << q_content_index->size();
|
qDebug() << "q_content_index:" << q_content_index->size();
|
||||||
while(!this->q_content_index->empty()) {
|
while(!this->q_content_index->empty()) {
|
||||||
// for (size_t i = 0; (i < this->u_send_length) && (!this->q_content_index->empty()); ++i){
|
// for (size_t i = 0; (i < this->u_send_length) && (!this->q_content_index->empty()); ++i){
|
||||||
for(size_t i = 0; (i < 30) && (!this->q_content_index->empty()); ++i) {
|
qint64 fileSize = 0;
|
||||||
tmp->enqueue(this->q_content_index->dequeue());
|
//修改一次处理的数据量,从30个文件改为文件总大小为50M以下,50M为暂定值--jxx20210519
|
||||||
|
for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->q_content_index->empty()); ++i) {
|
||||||
|
QPair<QString,qint64> tempPair = this->q_content_index->dequeue();
|
||||||
|
fileSize += tempPair.second;
|
||||||
|
if (fileSize > 52428800 ) {
|
||||||
|
if (tmp2->size() == 0) {
|
||||||
|
tmp2->enqueue(tempPair.first);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
this->q_content_index->enqueue(tempPair);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
tmp2->enqueue(tempPair.first);
|
||||||
}
|
}
|
||||||
this->p_indexGenerator->creatAllIndex(tmp);
|
// qDebug() << ">>>>>>>>all fileSize:" << fileSize << "file num:" << tmp->size() << "<<<<<<<<<<<<<<<<<<<";
|
||||||
tmp->clear();
|
this->p_indexGenerator->creatAllIndex(tmp2);
|
||||||
|
tmp2->clear();
|
||||||
}
|
}
|
||||||
delete tmp;
|
delete tmp2;
|
||||||
qDebug() << "content index end;";
|
qDebug() << "content index end;";
|
||||||
sem.release(2);
|
sem.release(2);
|
||||||
});
|
});
|
||||||
|
|
||||||
mutex1.lock();
|
mutex1.lock();
|
||||||
mutex2.lock();
|
mutex2.lock();
|
||||||
mutex3.lock();
|
mutex3.lock();
|
||||||
|
|
|
@ -56,13 +56,16 @@ private:
|
||||||
bool bool_dataBaseStatusOK = false;
|
bool bool_dataBaseStatusOK = false;
|
||||||
bool bool_dataBaseExist = false;
|
bool bool_dataBaseExist = false;
|
||||||
IndexGenerator* p_indexGenerator = nullptr;
|
IndexGenerator* p_indexGenerator = nullptr;
|
||||||
|
QThreadPool m_pool;
|
||||||
|
|
||||||
//here should be refact
|
//here should be refact
|
||||||
// MessageListManager* mlm;
|
// MessageListManager* mlm;
|
||||||
|
|
||||||
//test
|
//test
|
||||||
QQueue<QVector<QString>>* q_index;
|
QQueue<QVector<QString>>* q_index;
|
||||||
QQueue<QString>* q_content_index;
|
// QQueue<QString>* q_content_index;
|
||||||
|
//修改QQueue存储数据为QPair<QString,qint64>,增加存储文件大小数据便于处理时统计--jxx20210519
|
||||||
|
QQueue<QPair<QString,qint64>>* q_content_index;
|
||||||
|
|
||||||
const QMap<QString, bool> targetFileTypeMap = {
|
const QMap<QString, bool> targetFileTypeMap = {
|
||||||
std::map<QString, bool>::value_type("doc", true),
|
std::map<QString, bool>::value_type("doc", true),
|
||||||
|
|
|
@ -28,9 +28,8 @@
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
#include "index-generator.h"
|
#include "index-generator.h"
|
||||||
#include "chinese-segmentation.h"
|
#include "chinese-segmentation.h"
|
||||||
#include "construct-document.h"
|
|
||||||
#include <QStandardPaths>
|
#include <QStandardPaths>
|
||||||
|
#include <malloc.h>
|
||||||
|
|
||||||
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
|
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
|
||||||
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
|
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
|
||||||
|
@ -39,10 +38,14 @@ using namespace Zeeker;
|
||||||
|
|
||||||
static IndexGenerator *global_instance = nullptr;
|
static IndexGenerator *global_instance = nullptr;
|
||||||
QMutex IndexGenerator::m_mutex;
|
QMutex IndexGenerator::m_mutex;
|
||||||
QList<Document> *Zeeker::_doc_list_path;
|
//QVector<Document> *Zeeker::_doc_list_path;
|
||||||
QMutex Zeeker::_mutex_doc_list_path;
|
//QMutex Zeeker::_mutex_doc_list_path;
|
||||||
QList<Document> *Zeeker::_doc_list_content;
|
//QVector<Document> *Zeeker::_doc_list_content;
|
||||||
QMutex Zeeker::_mutex_doc_list_content;
|
//QMutex Zeeker::_mutex_doc_list_content;
|
||||||
|
QMutex IndexGenerator::_mutex_doc_list_path;
|
||||||
|
QMutex IndexGenerator::_mutex_doc_list_content;
|
||||||
|
QVector<Document> IndexGenerator::_doc_list_path = QVector<Document>();
|
||||||
|
QVector<Document> IndexGenerator::_doc_list_content = QVector<Document>();
|
||||||
|
|
||||||
IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent) {
|
IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent) {
|
||||||
QMutexLocker locker(&m_mutex);
|
QMutexLocker locker(&m_mutex);
|
||||||
|
@ -61,45 +64,33 @@ bool IndexGenerator::setIndexdataPath() {
|
||||||
|
|
||||||
//文件名索引
|
//文件名索引
|
||||||
bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList) {
|
bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList) {
|
||||||
// FileUtils::_index_status |= 0x1;
|
|
||||||
// qDebug() << messageList->size();
|
|
||||||
HandlePathList(messageList);
|
HandlePathList(messageList);
|
||||||
if(_doc_list_path == NULL) {
|
// if(_doc_list_path == NULL) {
|
||||||
|
// return false;
|
||||||
|
// }
|
||||||
|
if(IndexGenerator::_doc_list_path.isEmpty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
qDebug() << "begin creatAllIndex";
|
qDebug() << "begin creatAllIndex";
|
||||||
// GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "0");
|
|
||||||
try {
|
try {
|
||||||
// m_indexer = new Xapian::TermGenerator();
|
for(auto i : IndexGenerator::_doc_list_path) {
|
||||||
// m_indexer.set_database(*m_database_path);
|
|
||||||
//可以实现拼写纠正
|
|
||||||
// m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING);
|
|
||||||
// m_indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
|
|
||||||
|
|
||||||
// int count =0;
|
|
||||||
|
|
||||||
for(auto i : *_doc_list_path) {
|
|
||||||
|
|
||||||
insertIntoDatabase(i);
|
insertIntoDatabase(i);
|
||||||
// if(++count > 8999){
|
|
||||||
// count = 0;
|
|
||||||
// m_database_path->commit();
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
m_database_path->commit();
|
m_database_path->commit();
|
||||||
} catch(const Xapian::Error &e) {
|
} catch(const Xapian::Error &e) {
|
||||||
qWarning() << "creatAllIndex fail!" << QString::fromStdString(e.get_description());
|
qWarning() << "creatAllIndex fail!" << QString::fromStdString(e.get_description());
|
||||||
//need a record
|
//need a record
|
||||||
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "1");
|
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "1");
|
||||||
// FileUtils::_index_status &= ~0x1;
|
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
// GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2");
|
|
||||||
qDebug() << "finish creatAllIndex";
|
qDebug() << "finish creatAllIndex";
|
||||||
// FileUtils::_index_status &= ~0x1;
|
IndexGenerator::_doc_list_path.clear();
|
||||||
_doc_list_path->clear();
|
IndexGenerator::_doc_list_path.squeeze();
|
||||||
delete _doc_list_path;
|
QVector<Document>().swap(IndexGenerator::_doc_list_path);
|
||||||
_doc_list_path = nullptr;
|
|
||||||
|
// delete _doc_list_path;
|
||||||
|
// _doc_list_path = nullptr;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
//文件内容索引
|
//文件内容索引
|
||||||
|
@ -107,16 +98,19 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList) {
|
||||||
// FileUtils::_index_status |= 0x2;
|
// FileUtils::_index_status |= 0x2;
|
||||||
HandlePathList(messageList);
|
HandlePathList(messageList);
|
||||||
qDebug() << "begin creatAllIndex for content";
|
qDebug() << "begin creatAllIndex for content";
|
||||||
if(_doc_list_content == NULL) {
|
// if(_doc_list_content == NULL) {
|
||||||
|
// return false;
|
||||||
|
// }
|
||||||
|
if(IndexGenerator::_doc_list_content.isEmpty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int size = _doc_list_content->size();
|
int size = IndexGenerator::_doc_list_content.size();
|
||||||
qDebug() << "begin creatAllIndex for content" << size;
|
qDebug() << "begin creatAllIndex for content" << size;
|
||||||
if(!size == 0) {
|
if(!size == 0) {
|
||||||
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "0");
|
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "0");
|
||||||
try {
|
try {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for(auto i : *_doc_list_content) {
|
for(auto i : IndexGenerator::_doc_list_content) {
|
||||||
insertIntoContentDatabase(i);
|
insertIntoContentDatabase(i);
|
||||||
if(++count > 999) {
|
if(++count > 999) {
|
||||||
count = 0;
|
count = 0;
|
||||||
|
@ -133,9 +127,11 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList) {
|
||||||
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
|
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
|
||||||
// FileUtils::_index_status &= ~0x2;
|
// FileUtils::_index_status &= ~0x2;
|
||||||
qDebug() << "finish creatAllIndex for content";
|
qDebug() << "finish creatAllIndex for content";
|
||||||
_doc_list_content->clear();
|
|
||||||
delete _doc_list_content;
|
IndexGenerator::_doc_list_content.clear();
|
||||||
_doc_list_content = nullptr;
|
IndexGenerator::_doc_list_content.squeeze();
|
||||||
|
QVector<Document>().swap(IndexGenerator::_doc_list_content);
|
||||||
|
malloc_trim(0);
|
||||||
}
|
}
|
||||||
Q_EMIT this->transactionFinished();
|
Q_EMIT this->transactionFinished();
|
||||||
return true;
|
return true;
|
||||||
|
@ -297,7 +293,7 @@ void IndexGenerator::HandlePathList(QQueue<QString> *messageList) {
|
||||||
return;
|
return;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
//deprecated
|
||||||
Document IndexGenerator::GenerateDocument(const QVector<QString> &list) {
|
Document IndexGenerator::GenerateDocument(const QVector<QString> &list) {
|
||||||
Document doc;
|
Document doc;
|
||||||
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
|
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
|
||||||
|
@ -342,7 +338,7 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list) {
|
||||||
return doc;
|
return doc;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
//deprecated
|
||||||
Document IndexGenerator::GenerateContentDocument(const QString &path) {
|
Document IndexGenerator::GenerateContentDocument(const QString &path) {
|
||||||
// 构造文本索引的document
|
// 构造文本索引的document
|
||||||
QString content;
|
QString content;
|
||||||
|
@ -389,7 +385,7 @@ bool IndexGenerator::isIndexdataExist() {
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
//deprecated
|
||||||
QStringList IndexGenerator::IndexSearch(QString indexText) {
|
QStringList IndexGenerator::IndexSearch(QString indexText) {
|
||||||
QStringList searchResult;
|
QStringList searchResult;
|
||||||
try {
|
try {
|
||||||
|
@ -455,96 +451,60 @@ QStringList IndexGenerator::IndexSearch(QString indexText) {
|
||||||
return searchResult;
|
return searchResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
//void IndexGenerator::setSynonym()
|
|
||||||
//{
|
|
||||||
// try
|
|
||||||
// {
|
|
||||||
// m_database_path->add_synonym("a","A");
|
|
||||||
// m_database_path->add_synonym("b","B");
|
|
||||||
// m_database_path->add_synonym("c","C");
|
|
||||||
// m_database_path->add_synonym("d","D");
|
|
||||||
// m_database_path->add_synonym("e","A");
|
|
||||||
// m_database_path->add_synonym("f","F");
|
|
||||||
// m_database_path->add_synonym("g","G");
|
|
||||||
// m_database_path->add_synonym("h","H");
|
|
||||||
// m_database_path->add_synonym("i","I");
|
|
||||||
// m_database_path->add_synonym("j","J");
|
|
||||||
// m_database_path->add_synonym("k","K");
|
|
||||||
// m_database_path->add_synonym("l","L");
|
|
||||||
// m_database_path->add_synonym("m","M");
|
|
||||||
// m_database_path->add_synonym("n","N");
|
|
||||||
// m_database_path->add_synonym("o","O");
|
|
||||||
// m_database_path->add_synonym("p","P");
|
|
||||||
// m_database_path->add_synonym("q","Q");
|
|
||||||
// m_database_path->add_synonym("r","R");
|
|
||||||
// m_database_path->add_synonym("s","S");
|
|
||||||
// m_database_path->add_synonym("t","T");
|
|
||||||
// m_database_path->add_synonym("u","U");
|
|
||||||
// m_database_path->add_synonym("v","V");
|
|
||||||
// m_database_path->add_synonym("w","W");
|
|
||||||
// m_database_path->add_synonym("x","X");
|
|
||||||
// m_database_path->add_synonym("y","Y");
|
|
||||||
// m_database_path->add_synonym("z","Z");
|
|
||||||
|
|
||||||
// m_database_path->add_synonym("A","a");
|
|
||||||
// m_database_path->add_synonym("B","b");
|
|
||||||
// m_database_path->add_synonym("C","c");
|
|
||||||
// m_database_path->add_synonym("D","d");
|
|
||||||
// m_database_path->add_synonym("E","e");
|
|
||||||
// m_database_path->add_synonym("F","f");
|
|
||||||
// m_database_path->add_synonym("G","g");
|
|
||||||
// m_database_path->add_synonym("H","h");
|
|
||||||
// m_database_path->add_synonym("I","i");
|
|
||||||
// m_database_path->add_synonym("J","j");
|
|
||||||
// m_database_path->add_synonym("K","k");
|
|
||||||
// m_database_path->add_synonym("L","a");
|
|
||||||
// m_database_path->add_synonym("M","m");
|
|
||||||
// m_database_path->add_synonym("N","n");
|
|
||||||
// m_database_path->add_synonym("O","o");
|
|
||||||
// m_database_path->add_synonym("P","p");
|
|
||||||
// m_database_path->add_synonym("Q","q");
|
|
||||||
// m_database_path->add_synonym("R","r");
|
|
||||||
// m_database_path->add_synonym("S","s");
|
|
||||||
// m_database_path->add_synonym("T","t");
|
|
||||||
// m_database_path->add_synonym("U","u");
|
|
||||||
// m_database_path->add_synonym("V","v");
|
|
||||||
// m_database_path->add_synonym("W","w");
|
|
||||||
// m_database_path->add_synonym("X","x");
|
|
||||||
// m_database_path->add_synonym("Y","y");
|
|
||||||
// m_database_path->add_synonym("Z","z");
|
|
||||||
// m_database_path->commit();
|
|
||||||
// }
|
|
||||||
// catch(const Xapian::Error &e)
|
|
||||||
// {
|
|
||||||
// qWarning() <<QString::fromStdString(e.get_description());
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
|
|
||||||
bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
|
bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
|
||||||
QStringList *list = pathlist;
|
QStringList *list = pathlist;
|
||||||
if(list->isEmpty())
|
if(list->isEmpty())
|
||||||
return true;
|
return true;
|
||||||
for(int i = 0; i < list->size(); i++) {
|
try {
|
||||||
QString doc = list->at(i);
|
for(int i = 0; i < list->size(); i++) {
|
||||||
std::string uniqueterm = FileUtils::makeDocUterm(doc);
|
QString doc = list->at(i);
|
||||||
try {
|
std::string uniqueterm = FileUtils::makeDocUterm(doc);
|
||||||
qDebug() << "--delete start--";
|
qDebug() << "--delete start--";
|
||||||
m_database_path->delete_document(uniqueterm);
|
m_database_path->delete_document(uniqueterm);
|
||||||
m_database_content->delete_document(uniqueterm);
|
m_database_content->delete_document(uniqueterm);
|
||||||
qDebug() << "delete path" << doc;
|
qDebug() << "delete path" << doc;
|
||||||
qDebug() << "delete md5" << QString::fromStdString(uniqueterm);
|
qDebug() << "delete md5" << QString::fromStdString(uniqueterm);
|
||||||
m_database_path->commit();
|
|
||||||
m_database_content->commit();
|
|
||||||
qDebug() << "--delete finish--";
|
qDebug() << "--delete finish--";
|
||||||
// qDebug()<<"m_database_path->get_lastdocid()!!!"<<m_database_path->get_lastdocid();
|
// qDebug()<<"m_database_path->get_lastdocid()!!!"<<m_database_path->get_lastdocid();
|
||||||
|
// qDebug()<<"m_database_path->get_doccount()!!!"<<m_database_path->get_doccount();
|
||||||
// qDebug()<<"m_database_path->get_doccount()!!!"<<m_database_path->get_doccount();
|
|
||||||
} catch(const Xapian::Error &e) {
|
|
||||||
qWarning() << QString::fromStdString(e.get_description());
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
m_database_path->commit();
|
||||||
|
m_database_content->commit();
|
||||||
|
} catch(const Xapian::Error &e) {
|
||||||
|
qWarning() << QString::fromStdString(e.get_description());
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Q_EMIT this->transactionFinished();
|
Q_EMIT this->transactionFinished();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IndexGenerator::updateIndex(QVector<PendingFile> *pendingFiles)
|
||||||
|
{
|
||||||
|
QQueue<QVector<QString>> *fileIndexInfo = new QQueue<QVector<QString>>;
|
||||||
|
QQueue<QString> *fileContentIndexInfo = new QQueue<QString>;
|
||||||
|
QStringList *deleteList = new QStringList;
|
||||||
|
for(PendingFile file : *pendingFiles) {
|
||||||
|
if(file.shouldRemoveIndex()) {
|
||||||
|
|
||||||
|
deleteList->append(file.path());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
fileIndexInfo->append(QVector<QString>() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0"));
|
||||||
|
if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()]))
|
||||||
|
fileContentIndexInfo->append(file.path());
|
||||||
|
}
|
||||||
|
if(!deleteList->isEmpty()) {
|
||||||
|
deleteAllIndex(deleteList);
|
||||||
|
}
|
||||||
|
if(!fileIndexInfo->isEmpty()) {
|
||||||
|
creatAllIndex(fileIndexInfo);
|
||||||
|
}
|
||||||
|
if(!fileContentIndexInfo->isEmpty()) {
|
||||||
|
creatAllIndex(fileContentIndexInfo);
|
||||||
|
}
|
||||||
|
delete fileIndexInfo;
|
||||||
|
delete fileContentIndexInfo;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,18 +29,22 @@
|
||||||
#include <QMutex>
|
#include <QMutex>
|
||||||
#include <QQueue>
|
#include <QQueue>
|
||||||
//#include <QMetaObject>
|
//#include <QMetaObject>
|
||||||
|
#include "construct-document.h"
|
||||||
#include "index-status-recorder.h"
|
#include "index-status-recorder.h"
|
||||||
#include "document.h"
|
#include "document.h"
|
||||||
#include "file-reader.h"
|
#include "file-reader.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "pending-file.h"
|
||||||
|
|
||||||
namespace Zeeker {
|
namespace Zeeker {
|
||||||
extern QList<Document> *_doc_list_path;
|
//extern QVector<Document> *_doc_list_path;
|
||||||
extern QMutex _mutex_doc_list_path;
|
//extern QMutex _mutex_doc_list_path;
|
||||||
extern QList<Document> *_doc_list_content;
|
//extern QVector<Document> *_doc_list_content;
|
||||||
extern QMutex _mutex_doc_list_content;
|
//extern QMutex _mutex_doc_list_content;
|
||||||
|
|
||||||
class IndexGenerator : public QObject {
|
class IndexGenerator : public QObject {
|
||||||
|
friend class ConstructDocumentForPath;
|
||||||
|
friend class ConstructDocumentForContent;
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
public:
|
public:
|
||||||
static IndexGenerator *getInstance(bool rebuild = false, QObject *parent = nullptr);
|
static IndexGenerator *getInstance(bool rebuild = false, QObject *parent = nullptr);
|
||||||
|
@ -58,6 +62,7 @@ public Q_SLOTS:
|
||||||
bool creatAllIndex(QQueue<QVector<QString>> *messageList);
|
bool creatAllIndex(QQueue<QVector<QString>> *messageList);
|
||||||
bool creatAllIndex(QQueue<QString> *messageList);
|
bool creatAllIndex(QQueue<QString> *messageList);
|
||||||
bool deleteAllIndex(QStringList *pathlist);
|
bool deleteAllIndex(QStringList *pathlist);
|
||||||
|
bool updateIndex(QVector<PendingFile> *pendingFiles);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
explicit IndexGenerator(bool rebuild = false, QObject *parent = nullptr);
|
explicit IndexGenerator(bool rebuild = false, QObject *parent = nullptr);
|
||||||
|
@ -72,8 +77,10 @@ private:
|
||||||
void insertIntoDatabase(Document& doc);
|
void insertIntoDatabase(Document& doc);
|
||||||
void insertIntoContentDatabase(Document& doc);
|
void insertIntoContentDatabase(Document& doc);
|
||||||
|
|
||||||
// QList<Document> *m_doc_list_path; //for path index
|
static QVector<Document> _doc_list_path;
|
||||||
// QList<Document> *m_doc_list_content; // for text content index
|
static QMutex _mutex_doc_list_path;
|
||||||
|
static QVector<Document> _doc_list_content;
|
||||||
|
static QMutex _mutex_doc_list_content;
|
||||||
QMap<QString, QStringList> m_index_map;
|
QMap<QString, QStringList> m_index_map;
|
||||||
QString m_index_data_path;
|
QString m_index_data_path;
|
||||||
Xapian::WritableDatabase* m_database_path;
|
Xapian::WritableDatabase* m_database_path;
|
||||||
|
|
|
@ -13,8 +13,10 @@ IndexStatusRecorder *IndexStatusRecorder::getInstance()
|
||||||
|
|
||||||
void IndexStatusRecorder::setStatus(const QString &key, const QVariant &value)
|
void IndexStatusRecorder::setStatus(const QString &key, const QVariant &value)
|
||||||
{
|
{
|
||||||
|
m_mutex.lock();
|
||||||
m_status->setValue(key, value);
|
m_status->setValue(key, value);
|
||||||
m_status->sync();
|
m_status->sync();
|
||||||
|
m_mutex.unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
const QVariant IndexStatusRecorder::getStatus(const QString &key)
|
const QVariant IndexStatusRecorder::getStatus(const QString &key)
|
||||||
|
|
|
@ -4,9 +4,11 @@
|
||||||
#include <QObject>
|
#include <QObject>
|
||||||
#include <QSettings>
|
#include <QSettings>
|
||||||
#include <QDir>
|
#include <QDir>
|
||||||
|
#include <QMutex>
|
||||||
#define CONTENT_INDEX_DATABASE_STATE "content_index_database_state"
|
#define CONTENT_INDEX_DATABASE_STATE "content_index_database_state"
|
||||||
#define INDEX_DATABASE_STATE "index_database_state"
|
#define INDEX_DATABASE_STATE "index_database_state"
|
||||||
#define INOTIFY_NORMAL_EXIT "inotify_normal_exit"
|
#define INOTIFY_NORMAL_EXIT "inotify_normal_exit"
|
||||||
|
#define PENDING_FILE_QUEUE_FINISH "pending_file_queue_finish"
|
||||||
#define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf"
|
#define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf"
|
||||||
namespace Zeeker {
|
namespace Zeeker {
|
||||||
//fixme: we need a better way to record index status.
|
//fixme: we need a better way to record index status.
|
||||||
|
@ -21,6 +23,7 @@ public:
|
||||||
private:
|
private:
|
||||||
explicit IndexStatusRecorder(QObject *parent = nullptr);
|
explicit IndexStatusRecorder(QObject *parent = nullptr);
|
||||||
QSettings *m_status;
|
QSettings *m_status;
|
||||||
|
QMutex m_mutex;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,10 +4,14 @@ HEADERS += \
|
||||||
$$PWD/construct-document.h \
|
$$PWD/construct-document.h \
|
||||||
$$PWD/document.h \
|
$$PWD/document.h \
|
||||||
$$PWD/file-reader.h \
|
$$PWD/file-reader.h \
|
||||||
|
$$PWD/file-search-plugin.h \
|
||||||
$$PWD/first-index.h \
|
$$PWD/first-index.h \
|
||||||
$$PWD/index-generator.h \
|
$$PWD/index-generator.h \
|
||||||
$$PWD/index-status-recorder.h \
|
$$PWD/index-status-recorder.h \
|
||||||
$$PWD/inotify-index.h \
|
$$PWD/inotify-index.h \
|
||||||
|
$$PWD/inotify-watch.h \
|
||||||
|
$$PWD/pending-file-queue.h \
|
||||||
|
$$PWD/pending-file.h \
|
||||||
$$PWD/search-manager.h \
|
$$PWD/search-manager.h \
|
||||||
$$PWD/searchmethodmanager.h \
|
$$PWD/searchmethodmanager.h \
|
||||||
$$PWD/traverse_bfs.h \
|
$$PWD/traverse_bfs.h \
|
||||||
|
@ -17,10 +21,14 @@ SOURCES += \
|
||||||
$$PWD/construct-document.cpp \
|
$$PWD/construct-document.cpp \
|
||||||
$$PWD/document.cpp \
|
$$PWD/document.cpp \
|
||||||
$$PWD/file-reader.cpp \
|
$$PWD/file-reader.cpp \
|
||||||
|
$$PWD/file-search-plugin.cpp \
|
||||||
$$PWD/first-index.cpp \
|
$$PWD/first-index.cpp \
|
||||||
$$PWD/index-generator.cpp \
|
$$PWD/index-generator.cpp \
|
||||||
$$PWD/index-status-recorder.cpp \
|
$$PWD/index-status-recorder.cpp \
|
||||||
$$PWD/inotify-index.cpp \
|
$$PWD/inotify-index.cpp \
|
||||||
|
$$PWD/inotify-watch.cpp \
|
||||||
|
$$PWD/pending-file-queue.cpp \
|
||||||
|
$$PWD/pending-file.cpp \
|
||||||
$$PWD/search-manager.cpp \
|
$$PWD/search-manager.cpp \
|
||||||
$$PWD/searchmethodmanager.cpp \
|
$$PWD/searchmethodmanager.cpp \
|
||||||
$$PWD/traverse_bfs.cpp \
|
$$PWD/traverse_bfs.cpp \
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#include "inotify-index.h"
|
#include "inotify-index.h"
|
||||||
|
#include <QDataStream>
|
||||||
|
|
||||||
#define CREATE_FILE_NAME_INDEX \
|
#define CREATE_FILE_NAME_INDEX \
|
||||||
indexQueue->enqueue(QVector<QString>() << QString(event->name) << QString(currentPath[event->wd] + '/' + event->name) << QString((event->mask & IN_ISDIR) ? "1" : "0")); \
|
indexQueue->enqueue(QVector<QString>() << QString(event->name) << QString(currentPath[event->wd] + '/' + event->name) << QString((event->mask & IN_ISDIR) ? "1" : "0")); \
|
||||||
|
@ -51,7 +52,7 @@ InotifyIndex::InotifyIndex(const QString& path) : Traverse_BFS(path) {
|
||||||
UkuiSearchQDBus usQDBus;
|
UkuiSearchQDBus usQDBus;
|
||||||
usQDBus.setInotifyMaxUserWatches();
|
usQDBus.setInotifyMaxUserWatches();
|
||||||
qDebug() << "setInotifyMaxUserWatches end";
|
qDebug() << "setInotifyMaxUserWatches end";
|
||||||
|
m_sharedMemory = new QSharedMemory("ukui-search-shared-map", this);
|
||||||
}
|
}
|
||||||
|
|
||||||
InotifyIndex::~InotifyIndex() {
|
InotifyIndex::~InotifyIndex() {
|
||||||
|
@ -341,7 +342,24 @@ void InotifyIndex::run() {
|
||||||
qDebug() << "select timeout!";
|
qDebug() << "select timeout!";
|
||||||
::free(read_timeout);
|
::free(read_timeout);
|
||||||
IndexGenerator::getInstance()->~IndexGenerator();
|
IndexGenerator::getInstance()->~IndexGenerator();
|
||||||
// GlobalSettings::getInstance()->forceSync();
|
QBuffer buffer;
|
||||||
|
QDataStream out(&buffer);
|
||||||
|
if (m_sharedMemory->isAttached()) {
|
||||||
|
m_sharedMemory->detach();
|
||||||
|
}
|
||||||
|
buffer.open(QBuffer::ReadWrite);
|
||||||
|
out << currentPath;
|
||||||
|
int size = buffer.size();
|
||||||
|
if (!m_sharedMemory->create(size)) {
|
||||||
|
qDebug() << "Create sharedMemory Error: " << m_sharedMemory->errorString();
|
||||||
|
} else {
|
||||||
|
m_sharedMemory->lock();
|
||||||
|
char *to = static_cast<char *>(m_sharedMemory->data());
|
||||||
|
const char *from = buffer.data().constData();
|
||||||
|
memcpy(to, from, qMin(size, m_sharedMemory->size()));
|
||||||
|
m_sharedMemory->unlock();
|
||||||
|
}
|
||||||
|
// GlobalSettings::getInstance()->forceSync();
|
||||||
::_exit(0);
|
::_exit(0);
|
||||||
} else {
|
} else {
|
||||||
memset(buf, 0x00, BUF_LEN);
|
memset(buf, 0x00, BUF_LEN);
|
||||||
|
@ -373,6 +391,20 @@ void InotifyIndex::run() {
|
||||||
} else if(pid > 0) {
|
} else if(pid > 0) {
|
||||||
memset(buf, 0x00, BUF_LEN);
|
memset(buf, 0x00, BUF_LEN);
|
||||||
waitpid(pid, NULL, 0);
|
waitpid(pid, NULL, 0);
|
||||||
|
if (!m_sharedMemory->attach()) {
|
||||||
|
qDebug() << "SharedMemory attach Error: " << m_sharedMemory->errorString();
|
||||||
|
} else {
|
||||||
|
QBuffer buffer;
|
||||||
|
QDataStream in(&buffer);
|
||||||
|
QMap<int, QString> pathMap;
|
||||||
|
m_sharedMemory->lock();
|
||||||
|
buffer.setData(static_cast<const char *>(m_sharedMemory->constData()), m_sharedMemory->size());
|
||||||
|
buffer.open(QBuffer::ReadWrite);
|
||||||
|
in >> pathMap;
|
||||||
|
m_sharedMemory->unlock();
|
||||||
|
m_sharedMemory->detach();
|
||||||
|
currentPath = pathMap;
|
||||||
|
}
|
||||||
--FileUtils::_index_status;
|
--FileUtils::_index_status;
|
||||||
} else {
|
} else {
|
||||||
assert(false);
|
assert(false);
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
|
|
||||||
#include <QThread>
|
#include <QThread>
|
||||||
#include <QTimer>
|
#include <QTimer>
|
||||||
|
#include <QSharedMemory>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/inotify.h>
|
#include <sys/inotify.h>
|
||||||
#include "index-generator.h"
|
#include "index-generator.h"
|
||||||
|
@ -77,6 +78,7 @@ private:
|
||||||
std::map<QString, bool>::value_type("et", true),
|
std::map<QString, bool>::value_type("et", true),
|
||||||
std::map<QString, bool>::value_type("pdf", true)
|
std::map<QString, bool>::value_type("pdf", true)
|
||||||
};
|
};
|
||||||
|
QSharedMemory *m_sharedMemory = nullptr;
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,459 @@
|
||||||
|
#include "inotify-watch.h"
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <malloc.h>
|
||||||
|
#include <errno.h>
|
||||||
|
using namespace Zeeker;
|
||||||
|
static InotifyWatch* global_instance_InotifyWatch = nullptr;
|
||||||
|
|
||||||
|
Zeeker::InotifyWatch *Zeeker::InotifyWatch::getInstance(const QString &path)
|
||||||
|
{
|
||||||
|
if(!global_instance_InotifyWatch) {
|
||||||
|
global_instance_InotifyWatch = new InotifyWatch(path);
|
||||||
|
}
|
||||||
|
return global_instance_InotifyWatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
Zeeker::InotifyWatch::InotifyWatch(const QString &path): Traverse_BFS(path)
|
||||||
|
{
|
||||||
|
qDebug() << "setInotifyMaxUserWatches start";
|
||||||
|
UkuiSearchQDBus usQDBus;
|
||||||
|
usQDBus.setInotifyMaxUserWatches();
|
||||||
|
qDebug() << "setInotifyMaxUserWatches end";
|
||||||
|
m_sharedMemory = new QSharedMemory("ukui-search-shared-map", this);
|
||||||
|
}
|
||||||
|
|
||||||
|
InotifyWatch::~InotifyWatch()
|
||||||
|
{
|
||||||
|
if(m_notifier)
|
||||||
|
delete m_notifier;
|
||||||
|
m_notifier = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InotifyWatch::addWatch(const QString &path)
|
||||||
|
{
|
||||||
|
int ret = inotify_add_watch(m_inotifyFd, path.toStdString().c_str(), (IN_MOVED_FROM | IN_MOVED_TO | IN_CREATE | IN_DELETE | IN_MODIFY));
|
||||||
|
if(ret == -1) {
|
||||||
|
qWarning() << "AddWatch error:" << path;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
currentPath[ret] = path;
|
||||||
|
// qDebug() << "Watch: " << path << "ret: " << ret;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase)
|
||||||
|
{
|
||||||
|
inotify_rm_watch(m_inotifyFd, currentPath.key(path));
|
||||||
|
|
||||||
|
if(removeFromDatabase) {
|
||||||
|
for(QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();) {
|
||||||
|
// qDebug() << i.value();
|
||||||
|
// if(i.value().length() > path.length()) {
|
||||||
|
if(FileUtils::isOrUnder(i.value(), path)) {
|
||||||
|
qDebug() << "remove path: " << i.value();
|
||||||
|
inotify_rm_watch(m_inotifyFd, currentPath.key(path));
|
||||||
|
PendingFile f(i.value());
|
||||||
|
f.setDeleted();
|
||||||
|
f.setIsDir();
|
||||||
|
PendingFileQueue::getInstance()->enqueue(f);
|
||||||
|
currentPath.erase(i++);
|
||||||
|
} else {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for(QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();) {
|
||||||
|
// qDebug() << i.value();
|
||||||
|
if(i.value().length() > path.length()) {
|
||||||
|
if(FileUtils::isOrUnder(i.value(), path)) {
|
||||||
|
// if(i.value().startsWith(path + "/")) {
|
||||||
|
// qDebug() << "remove path: " << i.value();
|
||||||
|
inotify_rm_watch(m_inotifyFd, currentPath.key(path));
|
||||||
|
currentPath.erase(i++);
|
||||||
|
} else {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
currentPath.remove(currentPath.key(path));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void InotifyWatch::DoSomething(const QFileInfo &info)
|
||||||
|
{
|
||||||
|
qDebug() << info.fileName() << "-------" << info.absoluteFilePath();
|
||||||
|
if(info.isDir() && (!info.isSymLink())) {
|
||||||
|
this->addWatch(info.absoluteFilePath());
|
||||||
|
}
|
||||||
|
PendingFile f(info.absoluteFilePath());
|
||||||
|
if(info.isDir()) {
|
||||||
|
f.setIsDir();
|
||||||
|
}
|
||||||
|
PendingFileQueue::getInstance()->enqueue(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void InotifyWatch::firstTraverse()
|
||||||
|
{
|
||||||
|
QQueue<QString> bfs;
|
||||||
|
bfs.enqueue(this->path);
|
||||||
|
QFileInfoList list;
|
||||||
|
QDir dir;
|
||||||
|
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
|
||||||
|
dir.setSorting(QDir::DirsFirst);
|
||||||
|
while(!bfs.empty()) {
|
||||||
|
dir.setPath(bfs.dequeue());
|
||||||
|
list = dir.entryInfoList();
|
||||||
|
for(auto i : list) {
|
||||||
|
if(i.isDir() && (!(i.isSymLink()))) {
|
||||||
|
this->addWatch(i.absoluteFilePath());
|
||||||
|
bfs.enqueue(i.absoluteFilePath());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void InotifyWatch::stopWatch()
|
||||||
|
{
|
||||||
|
// if(this->isRunning()) {
|
||||||
|
// this->quit();
|
||||||
|
// if(m_notifier)
|
||||||
|
// delete m_notifier;
|
||||||
|
// m_notifier = nullptr;
|
||||||
|
// removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3");
|
||||||
|
}
|
||||||
|
|
||||||
|
void InotifyWatch::run()
|
||||||
|
{
|
||||||
|
m_inotifyFd = inotify_init();
|
||||||
|
if (m_inotifyFd > 0) {
|
||||||
|
qDebug()<<"Inotify init success!";
|
||||||
|
} else {
|
||||||
|
qWarning() << "Inotify init fail! Now try add inotify_user_instances.";
|
||||||
|
UkuiSearchQDBus usQDBus;
|
||||||
|
usQDBus.addInotifyUserInstances(128);
|
||||||
|
m_inotifyFd = inotify_init();
|
||||||
|
if (m_inotifyFd > 0) {
|
||||||
|
qDebug()<<"Inotify init success!";
|
||||||
|
} else {
|
||||||
|
printf("errno=%d\n",errno);
|
||||||
|
printf("Mesg:%s\n",strerror(errno));
|
||||||
|
Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this->addWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
|
||||||
|
this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
|
||||||
|
this->firstTraverse();
|
||||||
|
|
||||||
|
int fifo_fd;
|
||||||
|
char buffer[2];
|
||||||
|
memset(buffer, 0, sizeof(buffer));
|
||||||
|
fifo_fd = open(UKUI_SEARCH_PIPE_PATH, O_RDWR);
|
||||||
|
if(fifo_fd == -1) {
|
||||||
|
qWarning() << "Open fifo error\n";
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
int retval = read(fifo_fd, buffer, sizeof(buffer));
|
||||||
|
if(retval == -1) {
|
||||||
|
qWarning() << "read error\n";
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
qDebug("Read fifo[%s]", buffer);
|
||||||
|
|
||||||
|
qDebug("Read data ok");
|
||||||
|
close(fifo_fd);
|
||||||
|
if(buffer[0] & 0x1) {
|
||||||
|
qDebug("Data confirmed\n");
|
||||||
|
}
|
||||||
|
unlink(UKUI_SEARCH_PIPE_PATH);
|
||||||
|
|
||||||
|
while(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
|
||||||
|
fd_set fds;
|
||||||
|
FD_ZERO(&fds);
|
||||||
|
FD_SET(m_inotifyFd, &fds);
|
||||||
|
int rc;
|
||||||
|
rc = select(m_inotifyFd + 1, &fds, NULL, NULL, NULL);
|
||||||
|
if(rc > 0) {
|
||||||
|
int avail;
|
||||||
|
if (ioctl(m_inotifyFd, FIONREAD, &avail) == EINVAL) {
|
||||||
|
qWarning() << "Did not receive an entire inotify event.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char* buf = (char*)malloc(avail);
|
||||||
|
memset(buf, 0x00, avail);
|
||||||
|
|
||||||
|
const ssize_t len = read(m_inotifyFd, buf, avail);
|
||||||
|
if(len != avail) {
|
||||||
|
qWarning()<<"read event error";
|
||||||
|
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
while (i < len) {
|
||||||
|
const struct inotify_event* event = (struct inotify_event*)&buf[i];
|
||||||
|
if(event->name[0] != '.') {
|
||||||
|
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
|
||||||
|
// qDebug("mask:0x%x,",event->mask);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i += sizeof(struct inotify_event) + event->len;
|
||||||
|
}
|
||||||
|
if(i < len ) {
|
||||||
|
qDebug() << "fork";
|
||||||
|
slotEvent(buf, len);
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
} else if(rc < 0) {
|
||||||
|
// error
|
||||||
|
qWarning() << "select result < 0, error!";
|
||||||
|
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
qDebug() << "Leave watch loop";
|
||||||
|
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
|
||||||
|
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3");
|
||||||
|
removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false);
|
||||||
|
}
|
||||||
|
close(m_inotifyFd);
|
||||||
|
// fcntl(m_inotifyFd, F_SETFD, FD_CLOEXEC);
|
||||||
|
// m_notifier = new QSocketNotifier(m_inotifyFd, QSocketNotifier::Read);
|
||||||
|
// connect(m_notifier, &QSocketNotifier::activated, this, &InotifyWatch::slotEvent, Qt::DirectConnection);
|
||||||
|
// exec();
|
||||||
|
}
|
||||||
|
|
||||||
|
void InotifyWatch::slotEvent(char *buf, ssize_t len)
|
||||||
|
{
|
||||||
|
// eventProcess(socket);
|
||||||
|
++FileUtils::_index_status;
|
||||||
|
if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
|
||||||
|
pid_t pid;
|
||||||
|
pid = fork();
|
||||||
|
if(pid == 0) {
|
||||||
|
prctl(PR_SET_PDEATHSIG, SIGTERM);
|
||||||
|
prctl(PR_SET_NAME, "inotify-index");
|
||||||
|
this->eventProcess(buf, len);
|
||||||
|
fd_set read_fds;
|
||||||
|
int rc;
|
||||||
|
timeval* read_timeout = (timeval*)malloc(sizeof(timeval));
|
||||||
|
read_timeout->tv_sec = 40;
|
||||||
|
read_timeout->tv_usec = 0;
|
||||||
|
for(;;) {
|
||||||
|
FD_ZERO(&read_fds);
|
||||||
|
FD_SET(m_inotifyFd, &read_fds);
|
||||||
|
rc = select(m_inotifyFd + 1, &read_fds, NULL, NULL, read_timeout);
|
||||||
|
if(rc < 0) {
|
||||||
|
// error
|
||||||
|
qWarning() << "fork select result < 0, error!";
|
||||||
|
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
|
||||||
|
assert(false);
|
||||||
|
} else if(rc == 0) {
|
||||||
|
qDebug() << "select timeout!";
|
||||||
|
::free(read_timeout);
|
||||||
|
|
||||||
|
QBuffer buffer;
|
||||||
|
QDataStream out(&buffer);
|
||||||
|
if (m_sharedMemory->isAttached()) {
|
||||||
|
m_sharedMemory->detach();
|
||||||
|
}
|
||||||
|
buffer.open(QBuffer::ReadWrite);
|
||||||
|
out << currentPath;
|
||||||
|
int size = buffer.size();
|
||||||
|
if (!m_sharedMemory->create(size)) {
|
||||||
|
qDebug() << "Create sharedMemory Error: " << m_sharedMemory->errorString();
|
||||||
|
} else {
|
||||||
|
m_sharedMemory->lock();
|
||||||
|
char *to = static_cast<char *>(m_sharedMemory->data());
|
||||||
|
const char *from = buffer.data().constData();
|
||||||
|
memcpy(to, from, qMin(size, m_sharedMemory->size()));
|
||||||
|
m_sharedMemory->unlock();
|
||||||
|
}
|
||||||
|
// GlobalSettings::getInstance()->forceSync();
|
||||||
|
PendingFileQueue::getInstance()->forceFinish();
|
||||||
|
PendingFileQueue::getInstance()->~PendingFileQueue();
|
||||||
|
::_exit(0);
|
||||||
|
} else {
|
||||||
|
// qDebug() << "Select remain:" <<read_timeout->tv_sec;
|
||||||
|
this->eventProcess(m_inotifyFd);
|
||||||
|
// qDebug() << "Select remain:" <<read_timeout->tv_sec;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if(pid > 0) {
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
if (!m_sharedMemory->attach()) {
|
||||||
|
qDebug() << "SharedMemory attach Error: " << m_sharedMemory->errorString();
|
||||||
|
} else {
|
||||||
|
QBuffer buffer;
|
||||||
|
QDataStream in(&buffer);
|
||||||
|
QMap<int, QString> pathMap;
|
||||||
|
m_sharedMemory->lock();
|
||||||
|
buffer.setData(static_cast<const char *>(m_sharedMemory->constData()), m_sharedMemory->size());
|
||||||
|
buffer.open(QBuffer::ReadWrite);
|
||||||
|
in >> pathMap;
|
||||||
|
m_sharedMemory->unlock();
|
||||||
|
m_sharedMemory->detach();
|
||||||
|
currentPath = pathMap;
|
||||||
|
}
|
||||||
|
--FileUtils::_index_status;
|
||||||
|
} else {
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char * InotifyWatch::filter()
|
||||||
|
{
|
||||||
|
int avail;
|
||||||
|
if (ioctl(m_inotifyFd, FIONREAD, &avail) == EINVAL) {
|
||||||
|
qWarning() << "Did not receive an entire inotify event.";
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
char* buffer = (char*)malloc(avail);
|
||||||
|
memset(buffer, 0x00, avail);
|
||||||
|
|
||||||
|
const int len = read(m_inotifyFd, buffer, avail);
|
||||||
|
if(len != avail) {
|
||||||
|
qWarning()<<"read event error";
|
||||||
|
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
while (i < len) {
|
||||||
|
const struct inotify_event* event = (struct inotify_event*)&buffer[i];
|
||||||
|
if(event->name[0] == '.') {
|
||||||
|
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
|
||||||
|
// qDebug("mask:0x%x,",event->mask);
|
||||||
|
i += sizeof(struct inotify_event) + event->len;
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(buffer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
void InotifyWatch::eventProcess(int socket)
|
||||||
|
{
|
||||||
|
// qDebug()<< "Enter eventProcess!";
|
||||||
|
int avail;
|
||||||
|
if (ioctl(socket, FIONREAD, &avail) == EINVAL) {
|
||||||
|
qWarning() << "Did not receive an entire inotify event.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char* buffer = (char*)malloc(avail);
|
||||||
|
memset(buffer, 0x00, avail);
|
||||||
|
|
||||||
|
const ssize_t len = read(socket, buffer, avail);
|
||||||
|
if(len != avail) {
|
||||||
|
qWarning()<<"read event error";
|
||||||
|
// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
|
||||||
|
}
|
||||||
|
int i = 0;
|
||||||
|
while (i < len) {
|
||||||
|
const struct inotify_event* event = (struct inotify_event*)&buffer[i];
|
||||||
|
if(event->name[0] != '.') {
|
||||||
|
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
|
||||||
|
// qDebug("mask:0x%x,",event->mask);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i += sizeof(struct inotify_event) + event->len;
|
||||||
|
}
|
||||||
|
if(i >= len) {
|
||||||
|
qDebug() << "There is nothing to do!";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
eventProcess(buffer, len);
|
||||||
|
free(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
void InotifyWatch::eventProcess(const char *buffer, ssize_t len)
|
||||||
|
{
|
||||||
|
// qDebug()<< "Begin eventProcess! len:" << len;
|
||||||
|
|
||||||
|
char * p = const_cast<char*>(buffer);
|
||||||
|
while (p < buffer + len) {
|
||||||
|
const struct inotify_event* event = reinterpret_cast<inotify_event *>(p);
|
||||||
|
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
|
||||||
|
// qDebug("mask:0x%x,",event->mask);
|
||||||
|
if(event->name[0] != '.') {
|
||||||
|
QString path = currentPath[event->wd] + '/' + event->name;
|
||||||
|
//Create top dir first, traverse it last.
|
||||||
|
if(event->mask & IN_CREATE) {
|
||||||
|
// qDebug() << "IN_CREATE";
|
||||||
|
PendingFile f(path);
|
||||||
|
if(event->mask & IN_ISDIR) {
|
||||||
|
f.setIsDir();
|
||||||
|
}
|
||||||
|
PendingFileQueue::getInstance(this)->enqueue(f);
|
||||||
|
|
||||||
|
if(event->mask & IN_ISDIR) {
|
||||||
|
if(!QFileInfo(path).isSymLink()){
|
||||||
|
addWatch(path);
|
||||||
|
setPath(path);
|
||||||
|
Traverse();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if((event->mask & IN_DELETE) | (event->mask & IN_MOVED_FROM)) {
|
||||||
|
qDebug() << "IN_DELETE or IN_MOVED_FROM";
|
||||||
|
if(event->mask & IN_ISDIR) {
|
||||||
|
removeWatch(path);
|
||||||
|
} else {
|
||||||
|
PendingFile f(path);
|
||||||
|
f.setDeleted();
|
||||||
|
PendingFileQueue::getInstance()->enqueue(f);
|
||||||
|
}
|
||||||
|
p += sizeof(struct inotify_event) + event->len;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(event->mask & IN_MODIFY) {
|
||||||
|
// qDebug() << "IN_MODIFY";
|
||||||
|
if(!(event->mask & IN_ISDIR)) {
|
||||||
|
PendingFileQueue::getInstance()->enqueue(PendingFile(path));
|
||||||
|
}
|
||||||
|
goto next;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if(event->mask & IN_MOVED_TO) {
|
||||||
|
qDebug() << "IN_MOVED_TO";
|
||||||
|
if(event->mask & IN_ISDIR) {
|
||||||
|
removeWatch(path);
|
||||||
|
|
||||||
|
PendingFile f(path);
|
||||||
|
f.setIsDir();
|
||||||
|
PendingFileQueue::getInstance()->enqueue(f);
|
||||||
|
|
||||||
|
if(!QFileInfo(path).isSymLink()){
|
||||||
|
addWatch(path);
|
||||||
|
setPath(path);
|
||||||
|
Traverse();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//Enqueue a deleted file to merge.
|
||||||
|
PendingFile f(path);
|
||||||
|
f.setDeleted();
|
||||||
|
PendingFileQueue::getInstance()->enqueue(f);
|
||||||
|
//Enqueue a new one.
|
||||||
|
PendingFileQueue::getInstance()->enqueue(PendingFile(path));
|
||||||
|
}
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
next:
|
||||||
|
p += sizeof(struct inotify_event) + event->len;
|
||||||
|
}
|
||||||
|
// qDebug()<< "Finish eventProcess!";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
#ifndef INOTIFYWATCH_H
|
||||||
|
#define INOTIFYWATCH_H
|
||||||
|
|
||||||
|
#include <QThread>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/inotify.h>
|
||||||
|
#include <QSocketNotifier>
|
||||||
|
#include <QDataStream>
|
||||||
|
#include <QSharedMemory>
|
||||||
|
|
||||||
|
#include "traverse_bfs.h"
|
||||||
|
#include "ukui-search-qdbus.h"
|
||||||
|
#include "index-status-recorder.h"
|
||||||
|
#include "file-utils.h"
|
||||||
|
#include "first-index.h"
|
||||||
|
#include "pending-file-queue.h"
|
||||||
|
#include "common.h"
|
||||||
|
namespace Zeeker {
|
||||||
|
class InotifyWatch : public QThread, public Traverse_BFS
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
static InotifyWatch* getInstance(const QString& path);
|
||||||
|
|
||||||
|
bool addWatch(const QString &path);
|
||||||
|
bool removeWatch(const QString &path, bool removeFromDatabase = true);
|
||||||
|
virtual void DoSomething(const QFileInfo &info) final;
|
||||||
|
|
||||||
|
void firstTraverse();
|
||||||
|
void stopWatch();
|
||||||
|
protected:
|
||||||
|
void run() override;
|
||||||
|
|
||||||
|
private Q_SLOTS:
|
||||||
|
void slotEvent(char *buf, ssize_t len);
|
||||||
|
private:
|
||||||
|
explicit InotifyWatch(const QString& path);
|
||||||
|
~InotifyWatch();
|
||||||
|
char * filter();
|
||||||
|
void eventProcess(int socket);
|
||||||
|
void eventProcess(const char *buffer, ssize_t len);
|
||||||
|
|
||||||
|
int m_inotifyFd;
|
||||||
|
QSocketNotifier* m_notifier = nullptr;
|
||||||
|
QSharedMemory *m_sharedMemory = nullptr;
|
||||||
|
QMap<int, QString> currentPath;
|
||||||
|
QMutex m_mutex;
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif // INOTIFYWATCH_H
|
|
@ -0,0 +1,166 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2021, KylinSoft Co., Ltd.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "pending-file-queue.h"
|
||||||
|
#include "file-utils.h"
|
||||||
|
#include <malloc.h>
|
||||||
|
using namespace Zeeker;
|
||||||
|
static PendingFileQueue *global_instance_pending_file_queue = nullptr;
|
||||||
|
PendingFileQueue::PendingFileQueue(QObject *parent) : QThread(parent)
|
||||||
|
{
|
||||||
|
this->start();
|
||||||
|
|
||||||
|
m_cacheTimer = new QTimer;
|
||||||
|
m_minProcessTimer = new QTimer;
|
||||||
|
|
||||||
|
m_cacheTimer->setInterval(10*1000);
|
||||||
|
m_cacheTimer->setSingleShot(true);
|
||||||
|
m_minProcessTimer->setInterval(500);
|
||||||
|
m_minProcessTimer->setSingleShot(true);
|
||||||
|
|
||||||
|
m_cacheTimer->moveToThread(this);
|
||||||
|
m_minProcessTimer->moveToThread(this);
|
||||||
|
|
||||||
|
// connect(this, &PendingFileQueue::cacheTimerStart, m_cacheTimer, f, Qt::DirectConnection);
|
||||||
|
// connect(this, &PendingFileQueue::minProcessTimerStart, m_minProcessTimer, f,Qt::DirectConnection);
|
||||||
|
connect(this, SIGNAL(cacheTimerStart()), m_cacheTimer, SLOT(start()));
|
||||||
|
connect(this, SIGNAL(minProcessTimerStart()), m_minProcessTimer, SLOT(start()));
|
||||||
|
connect(this, &PendingFileQueue::timerStop, m_cacheTimer, &QTimer::stop);
|
||||||
|
connect(this, &PendingFileQueue::timerStop, m_minProcessTimer, &QTimer::stop);
|
||||||
|
|
||||||
|
connect(m_cacheTimer, &QTimer::timeout, this, &PendingFileQueue::processCache, Qt::DirectConnection);
|
||||||
|
connect(m_minProcessTimer, &QTimer::timeout, this, &PendingFileQueue::processCache, Qt::DirectConnection);
|
||||||
|
}
|
||||||
|
|
||||||
|
PendingFileQueue *PendingFileQueue::getInstance(QObject *parent)
|
||||||
|
{
|
||||||
|
if (!global_instance_pending_file_queue) {
|
||||||
|
global_instance_pending_file_queue = new PendingFileQueue(parent);
|
||||||
|
}
|
||||||
|
return global_instance_pending_file_queue;
|
||||||
|
}
|
||||||
|
|
||||||
|
PendingFileQueue::~PendingFileQueue()
|
||||||
|
{
|
||||||
|
if(m_cacheTimer) {
|
||||||
|
delete m_cacheTimer;
|
||||||
|
m_cacheTimer = nullptr;
|
||||||
|
}
|
||||||
|
if(m_minProcessTimer) {
|
||||||
|
delete m_minProcessTimer;
|
||||||
|
m_minProcessTimer = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexGenerator::getInstance()->~IndexGenerator();
|
||||||
|
}
|
||||||
|
|
||||||
|
void PendingFileQueue::forceFinish()
|
||||||
|
{
|
||||||
|
QThread::msleep(600);
|
||||||
|
Q_EMIT timerStop();
|
||||||
|
this->quit();
|
||||||
|
this->wait();
|
||||||
|
}
|
||||||
|
void PendingFileQueue::enqueue(const PendingFile &file)
|
||||||
|
{
|
||||||
|
// qDebug() << "enqueuq file: " << file.path();
|
||||||
|
m_mutex.lock();
|
||||||
|
m_enqueuetimes++;
|
||||||
|
if(m_cache.isEmpty()) {
|
||||||
|
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "0");
|
||||||
|
}
|
||||||
|
// Remove all indexs of files under a dir which is to about be deleted,but keep delete signals.
|
||||||
|
// Because our datebase need to delete those indexs one by one.
|
||||||
|
if(file.shouldRemoveIndex() && file.isDir()) {
|
||||||
|
const auto keepFile = [&file](const PendingFile& pending) {
|
||||||
|
return (!FileUtils::isOrUnder(pending.path(), file.path()) || pending.shouldRemoveIndex());
|
||||||
|
};
|
||||||
|
const auto end = m_cache.end();
|
||||||
|
const auto droppedFilesBegin = std::stable_partition(m_cache.begin(), end, keepFile);
|
||||||
|
m_cache.erase(droppedFilesBegin, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(file.shouldRemoveIndex()) {
|
||||||
|
m_cache.removeOne(file);
|
||||||
|
}
|
||||||
|
int i = m_cache.indexOf(file);
|
||||||
|
if (i == -1) {
|
||||||
|
// qDebug() << "insert file" << file.path() << file.shouldRemoveIndex();
|
||||||
|
m_cache << file;
|
||||||
|
} else {
|
||||||
|
// qDebug() << "merge file" << file.path() << file.shouldRemoveIndex();
|
||||||
|
m_cache[i].merge(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!m_cacheTimer->isActive()) {
|
||||||
|
// qDebug()<<"m_cacheTimer-----start!!";
|
||||||
|
// m_cacheTimer->start();
|
||||||
|
Q_EMIT cacheTimerStart();
|
||||||
|
}
|
||||||
|
Q_EMIT minProcessTimerStart();
|
||||||
|
// m_minProcessTimer->start();
|
||||||
|
// qDebug()<<"m_minProcessTimer-----start!!";
|
||||||
|
m_mutex.unlock();
|
||||||
|
// qDebug() << "Current cache-------------";
|
||||||
|
// for(PendingFile i : m_cache) {
|
||||||
|
// qDebug() << "|" << i.path();
|
||||||
|
// qDebug() << "|" <<i.shouldRemoveIndex();
|
||||||
|
// }
|
||||||
|
// qDebug() << "Current cache-------------";
|
||||||
|
// qDebug()<<"enqueuq file finish!!"<<file.path();
|
||||||
|
}
|
||||||
|
|
||||||
|
void PendingFileQueue::run()
|
||||||
|
{
|
||||||
|
exec();
|
||||||
|
}
|
||||||
|
|
||||||
|
void PendingFileQueue::processCache()
|
||||||
|
{
|
||||||
|
qDebug()<< "Begin processCache!" ;
|
||||||
|
m_mutex.lock();
|
||||||
|
qDebug() << "Events:" << m_enqueuetimes;
|
||||||
|
m_enqueuetimes = 0;
|
||||||
|
m_cache.swap(m_pendingFiles);
|
||||||
|
// m_pendingFiles = m_cache;
|
||||||
|
// m_cache.clear();
|
||||||
|
// m_cache.squeeze();
|
||||||
|
m_mutex.unlock();
|
||||||
|
qDebug() << "Current process-------------";
|
||||||
|
for(PendingFile i : m_pendingFiles) {
|
||||||
|
qDebug() << "|" << i.path();
|
||||||
|
qDebug() << "|" <<i.shouldRemoveIndex();
|
||||||
|
}
|
||||||
|
qDebug() << "Current process-------------";
|
||||||
|
if(m_pendingFiles.isEmpty()) {
|
||||||
|
qDebug()<< "Empty, finish processCache!";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
IndexGenerator::getInstance()->updateIndex(&m_pendingFiles);
|
||||||
|
m_mutex.lock();
|
||||||
|
if(m_cache.isEmpty()) {
|
||||||
|
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2");
|
||||||
|
}
|
||||||
|
m_mutex.unlock();
|
||||||
|
m_pendingFiles.clear();
|
||||||
|
m_pendingFiles.squeeze();
|
||||||
|
malloc_trim(0);
|
||||||
|
qDebug()<< "Finish processCache!";
|
||||||
|
return;
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2021, KylinSoft Co., Ltd.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#ifndef PENDINGFILEQUEUE_H
|
||||||
|
#define PENDINGFILEQUEUE_H
|
||||||
|
|
||||||
|
#include <QObject>
|
||||||
|
#include <QVector>
|
||||||
|
#include <QTimer>
|
||||||
|
#include <QThread>
|
||||||
|
#include <QMutex>
|
||||||
|
#include "pending-file.h"
|
||||||
|
#include "index-generator.h"
|
||||||
|
|
||||||
|
namespace Zeeker {
|
||||||
|
class PendingFileQueue : public QThread
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
public:
|
||||||
|
static PendingFileQueue *getInstance(QObject *parent = nullptr);
|
||||||
|
|
||||||
|
~PendingFileQueue();
|
||||||
|
//This method will block until current cache has been processed.
|
||||||
|
//Do not do enqueue operation in other thread while this method is running.
|
||||||
|
void forceFinish();
|
||||||
|
void enqueue(const PendingFile& file);
|
||||||
|
QTimer *m_cacheTimer = nullptr;
|
||||||
|
QTimer *m_minProcessTimer = nullptr;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void run() override;
|
||||||
|
Q_SIGNALS:
|
||||||
|
void cacheTimerStart();
|
||||||
|
void minProcessTimerStart();
|
||||||
|
void timerStop();
|
||||||
|
private:
|
||||||
|
void processCache();
|
||||||
|
explicit PendingFileQueue(QObject *parent = nullptr);
|
||||||
|
|
||||||
|
QVector<PendingFile> m_cache;
|
||||||
|
QVector<PendingFile> m_pendingFiles;
|
||||||
|
QMutex m_mutex;
|
||||||
|
QMutex m_timeoutMutex;
|
||||||
|
|
||||||
|
QThread *m_timerThread = nullptr;
|
||||||
|
bool m_timeout = false;
|
||||||
|
int m_enqueuetimes = 0;
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif // PENDINGFILEQUEUE_H
|
|
@ -0,0 +1,81 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2021, KylinSoft Co., Ltd.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "pending-file.h"
|
||||||
|
using namespace Zeeker;
|
||||||
|
PendingFile::PendingFile(const QString &path)
|
||||||
|
: m_path(path)
|
||||||
|
, m_deleted(false)
|
||||||
|
, m_modified(false)
|
||||||
|
, m_isDir(false)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
QString PendingFile::path() const
|
||||||
|
{
|
||||||
|
return m_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PendingFile::setPath(const QString& path)
|
||||||
|
{
|
||||||
|
if (path.endsWith(QLatin1Char('/'))) {
|
||||||
|
m_path = path.mid(0, m_path.length() - 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
m_path = path;
|
||||||
|
}
|
||||||
|
|
||||||
|
//bool PendingFile::isNewFile() const
|
||||||
|
//{
|
||||||
|
// return m_created;
|
||||||
|
//}
|
||||||
|
|
||||||
|
//bool PendingFile::shouldIndexContents() const
|
||||||
|
//{
|
||||||
|
// if (m_created || m_modified) {
|
||||||
|
// return true;
|
||||||
|
// }
|
||||||
|
// return false;
|
||||||
|
//}
|
||||||
|
|
||||||
|
bool PendingFile::isDir() const
|
||||||
|
{
|
||||||
|
return m_isDir;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PendingFile::shouldRemoveIndex() const
|
||||||
|
{
|
||||||
|
return m_deleted;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PendingFile::merge(const PendingFile& file)
|
||||||
|
{
|
||||||
|
// m_created |= file.m_created;
|
||||||
|
m_modified = file.m_modified;
|
||||||
|
m_deleted = file.m_deleted;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PendingFile::printFlags() const
|
||||||
|
{
|
||||||
|
// qDebug() << "Created:" << m_created;
|
||||||
|
qDebug() << "Deleted:" << m_deleted;
|
||||||
|
qDebug() << "Modified:" << m_modified;
|
||||||
|
qDebug() << "Is dir:" << m_isDir;
|
||||||
|
}
|
|
@ -0,0 +1,65 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2021, KylinSoft Co., Ltd.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#ifndef PENDINGFILE_H
|
||||||
|
#define PENDINGFILE_H
|
||||||
|
|
||||||
|
#include <QString>
|
||||||
|
#include <QDebug>
|
||||||
|
namespace Zeeker {
|
||||||
|
/**
|
||||||
|
* Represents a file/folder which needs to be indexed.
|
||||||
|
*/
|
||||||
|
class PendingFile
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit PendingFile(const QString& path = QString());
|
||||||
|
|
||||||
|
QString path() const;
|
||||||
|
void setPath(const QString& path);
|
||||||
|
void setIsDir(){ m_isDir = true; }
|
||||||
|
void setModified() { m_modified = true; }
|
||||||
|
// void setCreated() { m_created = true; }
|
||||||
|
void setDeleted() { m_deleted = true; }
|
||||||
|
bool shouldRemoveIndex() const;
|
||||||
|
// bool shouldIndexContents() const;
|
||||||
|
bool isDir() const;
|
||||||
|
|
||||||
|
bool operator == (const PendingFile& rhs) const {
|
||||||
|
return (m_path == rhs.m_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a PendingFile \p file and merges its flags into
|
||||||
|
* the current PendingFile
|
||||||
|
*/
|
||||||
|
void merge(const PendingFile& file);
|
||||||
|
|
||||||
|
private:
|
||||||
|
QString m_path;
|
||||||
|
|
||||||
|
// bool m_created : 1;
|
||||||
|
bool m_deleted : 1;
|
||||||
|
bool m_modified : 1;
|
||||||
|
bool m_isDir : 1;
|
||||||
|
|
||||||
|
void printFlags() const;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif // PENDINGFILE_H
|
|
@ -27,7 +27,7 @@ QMutex SearchManager::m_mutex1;
|
||||||
QMutex SearchManager::m_mutex2;
|
QMutex SearchManager::m_mutex2;
|
||||||
QMutex SearchManager::m_mutex3;
|
QMutex SearchManager::m_mutex3;
|
||||||
SearchManager::SearchManager(QObject *parent) : QObject(parent) {
|
SearchManager::SearchManager(QObject *parent) : QObject(parent) {
|
||||||
m_pool.setMaxThreadCount(2);
|
m_pool.setMaxThreadCount(3);
|
||||||
m_pool.setExpiryTimeout(1000);
|
m_pool.setExpiryTimeout(1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,39 +56,54 @@ void SearchManager::onKeywordSearch(QString keyword, QQueue<QString> *searchResu
|
||||||
++uniqueSymbol3;
|
++uniqueSymbol3;
|
||||||
m_mutex3.unlock();
|
m_mutex3.unlock();
|
||||||
|
|
||||||
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
|
// if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
|
||||||
DirectSearch *directSearch;
|
// DirectSearch *directSearch;
|
||||||
directSearch = new DirectSearch(keyword, searchResultFile, searchResultDir, uniqueSymbol1);
|
// directSearch = new DirectSearch(keyword, searchResultFile, searchResultDir, uniqueSymbol1);
|
||||||
m_pool.start(directSearch);
|
// m_pool.start(directSearch);
|
||||||
} else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
|
// } else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
|
||||||
FileSearch *filesearch;
|
// FileSearch *filesearch;
|
||||||
filesearch = new FileSearch(searchResultFile, uniqueSymbol1, keyword, "0", 1, 0, 5);
|
// filesearch = new FileSearch(searchResultFile, uniqueSymbol1, keyword, "0", 1, 0, 5);
|
||||||
m_pool.start(filesearch);
|
// m_pool.start(filesearch);
|
||||||
|
|
||||||
FileSearch *dirsearch;
|
// FileSearch *dirsearch;
|
||||||
dirsearch = new FileSearch(searchResultDir, uniqueSymbol2, keyword, "1", 1, 0, 5);
|
// dirsearch = new FileSearch(searchResultDir, uniqueSymbol2, keyword, "1", 1, 0, 5);
|
||||||
m_pool.start(dirsearch);
|
// m_pool.start(dirsearch);
|
||||||
|
|
||||||
FileContentSearch *contentSearch;
|
// FileContentSearch *contentSearch;
|
||||||
contentSearch = new FileContentSearch(searchResultContent, uniqueSymbol3, keyword, 0, 5);
|
// contentSearch = new FileContentSearch(searchResultContent, uniqueSymbol3, keyword, 0, 5);
|
||||||
m_pool.start(contentSearch);
|
// m_pool.start(contentSearch);
|
||||||
} else {
|
// } else {
|
||||||
qWarning() << "Unknown search method! FileUtils::searchMethod: " << static_cast<int>(FileUtils::searchMethod);
|
// qWarning() << "Unknown search method! FileUtils::searchMethod: " << static_cast<int>(FileUtils::searchMethod);
|
||||||
}
|
// }
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SearchManager::isBlocked(QString &path) {
|
bool SearchManager::isBlocked(QString &path) {
|
||||||
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
|
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
|
||||||
for(QString i : blockList) {
|
for(QString i : blockList) {
|
||||||
if(path.startsWith(i.prepend("/")))
|
if(FileUtils::isOrUnder(path, i))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
FileSearch::FileSearch(QQueue<QString> *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) {
|
bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString path)
|
||||||
|
{
|
||||||
|
QFileInfo info(path);
|
||||||
|
if(!info.exists()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ri.icon = FileUtils::getFileIcon(QUrl::fromLocalFile(path).toString());
|
||||||
|
ri.name = info.fileName();
|
||||||
|
ri.description = QVector<SearchPluginIface::DescriptionInfo>() \
|
||||||
|
<< SearchPluginIface::DescriptionInfo{tr("Path:"), path} \
|
||||||
|
<< SearchPluginIface::DescriptionInfo{tr("Modified time:"), info.lastModified().toString("yyyy/MM/dd hh:mm:ss")};
|
||||||
|
ri.actionKey = path;
|
||||||
|
ri.type = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
FileSearch::FileSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) {
|
||||||
this->setAutoDelete(true);
|
this->setAutoDelete(true);
|
||||||
m_search_result = searchResult;
|
m_search_result = searchResult;
|
||||||
m_uniqueSymbol = uniqueSymbol;
|
m_uniqueSymbol = uniqueSymbol;
|
||||||
|
@ -178,18 +193,13 @@ int FileSearch::getResult(Xapian::MSet &result) {
|
||||||
if(SearchManager::isBlocked(path)) {
|
if(SearchManager::isBlocked(path)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
SearchPluginIface::ResultInfo ri;
|
||||||
QFileInfo info(path);
|
if(SearchManager::creatResultInfo(ri, path)) {
|
||||||
|
|
||||||
if(!info.exists()) {
|
|
||||||
// pathTobeDelete->append(QString::fromStdString(data));
|
|
||||||
qDebug() << path << "is not exist!!";
|
|
||||||
} else {
|
|
||||||
switch(m_value.toInt()) {
|
switch(m_value.toInt()) {
|
||||||
case 1:
|
case 1:
|
||||||
SearchManager::m_mutex1.lock();
|
SearchManager::m_mutex1.lock();
|
||||||
if(m_uniqueSymbol == SearchManager::uniqueSymbol2) {
|
if(m_uniqueSymbol == SearchManager::uniqueSymbol2) {
|
||||||
m_search_result->enqueue(path);
|
m_search_result->enqueue(ri);
|
||||||
SearchManager::m_mutex1.unlock();
|
SearchManager::m_mutex1.unlock();
|
||||||
} else {
|
} else {
|
||||||
SearchManager::m_mutex1.unlock();
|
SearchManager::m_mutex1.unlock();
|
||||||
|
@ -200,7 +210,7 @@ int FileSearch::getResult(Xapian::MSet &result) {
|
||||||
case 0:
|
case 0:
|
||||||
SearchManager::m_mutex2.lock();
|
SearchManager::m_mutex2.lock();
|
||||||
if(m_uniqueSymbol == SearchManager::uniqueSymbol1) {
|
if(m_uniqueSymbol == SearchManager::uniqueSymbol1) {
|
||||||
m_search_result->enqueue(path);
|
m_search_result->enqueue(ri);
|
||||||
SearchManager::m_mutex2.unlock();
|
SearchManager::m_mutex2.unlock();
|
||||||
} else {
|
} else {
|
||||||
SearchManager::m_mutex2.unlock();
|
SearchManager::m_mutex2.unlock();
|
||||||
|
@ -210,8 +220,8 @@ int FileSearch::getResult(Xapian::MSet &result) {
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// searchResult.append(path);
|
|
||||||
}
|
}
|
||||||
|
// searchResult.append(path);
|
||||||
qDebug() << "doc=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
|
qDebug() << "doc=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
|
||||||
}
|
}
|
||||||
// if(!pathTobeDelete->isEmpty())
|
// if(!pathTobeDelete->isEmpty())
|
||||||
|
@ -219,7 +229,7 @@ int FileSearch::getResult(Xapian::MSet &result) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
FileContentSearch::FileContentSearch(QQueue<QPair<QString, QStringList>> *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) {
|
FileContentSearch::FileContentSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) {
|
||||||
this->setAutoDelete(true);
|
this->setAutoDelete(true);
|
||||||
m_search_result = searchResult;
|
m_search_result = searchResult;
|
||||||
m_uniqueSymbol = uniqueSymbol;
|
m_uniqueSymbol = uniqueSymbol;
|
||||||
|
@ -280,29 +290,15 @@ int FileContentSearch::keywordSearchContent() {
|
||||||
words.append(sKeyWord.at(i).word).append(" ");
|
words.append(sKeyWord.at(i).word).append(" ");
|
||||||
}
|
}
|
||||||
|
|
||||||
Xapian::Query query = qp.parse_query(words);
|
|
||||||
// Xapian::Query query = qp.parse_query(keyword.toStdString());
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword);
|
|
||||||
// //Creat a query
|
|
||||||
// std::string words;
|
|
||||||
// for(int i=0;i<sKeyWord.size();i++)
|
|
||||||
// {
|
|
||||||
// words.append(sKeyWord.at(i).word).append(" ");
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
// Xapian::Query query = qp.parse_query(words);
|
// Xapian::Query query = qp.parse_query(words);
|
||||||
|
|
||||||
// std::vector<Xapian::Query> v;
|
std::vector<Xapian::Query> v;
|
||||||
// for(int i=0;i<sKeyWord.size();i++)
|
for(int i=0; i<sKeyWord.size(); i++) {
|
||||||
// {
|
v.push_back(Xapian::Query(sKeyWord.at(i).word));
|
||||||
// v.push_back(Xapian::Query(sKeyWord.at(i).word));
|
qDebug() << QString::fromStdString(sKeyWord.at(i).word);
|
||||||
// qDebug()<<QString::fromStdString(sKeyWord.at(i).word);
|
}
|
||||||
// }
|
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
|
||||||
// Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
|
|
||||||
qDebug() << "keywordSearchContent:" << QString::fromStdString(query.get_description());
|
qDebug() << "keywordSearchContent:" << QString::fromStdString(query.get_description());
|
||||||
|
|
||||||
enquire.set_query(query);
|
enquire.set_query(query);
|
||||||
|
@ -339,15 +335,12 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
QFileInfo info(path);
|
SearchPluginIface::ResultInfo ri;
|
||||||
|
if(!SearchManager::creatResultInfo(ri, path)) {
|
||||||
if(!info.exists()) {
|
|
||||||
// pathTobeDelete->append(QString::fromStdString(data));
|
|
||||||
qDebug() << path << "is not exist!!";
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Construct snippets containing keyword.
|
// Construct snippets containing keyword.
|
||||||
QStringList snippets;
|
// QStringList snippets;
|
||||||
// snippets.append(QString::fromStdString( result.snippet(doc.get_data(),400)));
|
// snippets.append(QString::fromStdString( result.snippet(doc.get_data(),400)));
|
||||||
// qWarning()<<QString::fromStdString(s);
|
// qWarning()<<QString::fromStdString(s);
|
||||||
auto term = doc.termlist_begin();
|
auto term = doc.termlist_begin();
|
||||||
|
@ -363,13 +356,16 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
|
||||||
} else {
|
} else {
|
||||||
snippet.append("...").prepend("...");
|
snippet.append("...").prepend("...");
|
||||||
}
|
}
|
||||||
snippets.append(snippet);
|
ri.description.prepend(SearchPluginIface::DescriptionInfo{"",snippet});
|
||||||
|
// snippets.append(snippet);
|
||||||
QString().swap(snippet);
|
QString().swap(snippet);
|
||||||
std::string().swap(s);
|
std::string().swap(s);
|
||||||
++count;
|
++count;
|
||||||
}
|
}
|
||||||
std::string().swap(data);
|
std::string().swap(data);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// for(QString i : QString::fromStdString(keyWord).split(" ",QString::SkipEmptyParts))
|
// for(QString i : QString::fromStdString(keyWord).split(" ",QString::SkipEmptyParts))
|
||||||
// {
|
// {
|
||||||
// std::string word = i.toStdString();
|
// std::string word = i.toStdString();
|
||||||
|
@ -387,10 +383,10 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
|
||||||
|
|
||||||
SearchManager::m_mutex3.lock();
|
SearchManager::m_mutex3.lock();
|
||||||
if(m_uniqueSymbol == SearchManager::uniqueSymbol3) {
|
if(m_uniqueSymbol == SearchManager::uniqueSymbol3) {
|
||||||
m_search_result->enqueue(qMakePair(path, snippets));
|
m_search_result->enqueue(ri);
|
||||||
SearchManager::m_mutex3.unlock();
|
SearchManager::m_mutex3.unlock();
|
||||||
snippets.clear();
|
// snippets.clear();
|
||||||
QStringList().swap(snippets);
|
// QStringList().swap(snippets);
|
||||||
} else {
|
} else {
|
||||||
SearchManager::m_mutex3.unlock();
|
SearchManager::m_mutex3.unlock();
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -403,12 +399,12 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
DirectSearch::DirectSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, size_t uniqueSymbol) {
|
DirectSearch::DirectSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult, QString value, size_t uniqueSymbol) {
|
||||||
this->setAutoDelete(true);
|
this->setAutoDelete(true);
|
||||||
m_keyword = keyword;
|
m_keyword = keyword;
|
||||||
m_searchResultFile = searchResultFile;
|
m_searchResult = searchResult;
|
||||||
m_searchResultDir = searchResultDir;
|
|
||||||
m_uniqueSymbol = uniqueSymbol;
|
m_uniqueSymbol = uniqueSymbol;
|
||||||
|
m_value = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DirectSearch::run() {
|
void DirectSearch::run() {
|
||||||
|
@ -417,8 +413,13 @@ void DirectSearch::run() {
|
||||||
QFileInfoList list;
|
QFileInfoList list;
|
||||||
QDir dir;
|
QDir dir;
|
||||||
// QDir::Hidden
|
// QDir::Hidden
|
||||||
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
|
if(m_value == DIR_SEARCH_VALUE) {
|
||||||
dir.setSorting(QDir::DirsFirst);
|
dir.setFilter(QDir::Dirs | QDir::NoDotAndDotDot);
|
||||||
|
} else {
|
||||||
|
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
|
||||||
|
dir.setSorting(QDir::DirsFirst);
|
||||||
|
}
|
||||||
|
|
||||||
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
|
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
|
||||||
while(!bfs.empty()) {
|
while(!bfs.empty()) {
|
||||||
dir.setPath(bfs.dequeue());
|
dir.setPath(bfs.dequeue());
|
||||||
|
@ -428,7 +429,7 @@ void DirectSearch::run() {
|
||||||
|
|
||||||
bool findIndex = false;
|
bool findIndex = false;
|
||||||
for (QString j : blockList) {
|
for (QString j : blockList) {
|
||||||
if (i.absoluteFilePath().startsWith(j.prepend("/"))) {
|
if (FileUtils::isOrUnder(i.absoluteFilePath(), j)) {
|
||||||
findIndex = true;
|
findIndex = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -442,26 +443,22 @@ void DirectSearch::run() {
|
||||||
bfs.enqueue(i.absoluteFilePath());
|
bfs.enqueue(i.absoluteFilePath());
|
||||||
}
|
}
|
||||||
if(i.fileName().contains(m_keyword, Qt::CaseInsensitive)) {
|
if(i.fileName().contains(m_keyword, Qt::CaseInsensitive)) {
|
||||||
SearchManager::m_mutex1.lock();
|
|
||||||
// qWarning() << i.fileName() << m_keyword;
|
// qWarning() << i.fileName() << m_keyword;
|
||||||
if(m_uniqueSymbol == SearchManager::uniqueSymbol1) {
|
if(m_searchResult->length() > 49)
|
||||||
// TODO
|
|
||||||
if(i.isDir() && m_searchResultDir->length() < 51) {
|
|
||||||
m_searchResultDir->enqueue(i.absoluteFilePath());
|
|
||||||
} else if(m_searchResultFile->length() < 51) {
|
|
||||||
m_searchResultFile->enqueue(i.absoluteFilePath());
|
|
||||||
}
|
|
||||||
SearchManager::m_mutex1.unlock();
|
|
||||||
if(m_searchResultDir->length() > 49 && m_searchResultFile->length() > 49) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// TODO
|
|
||||||
// More suitable method?
|
|
||||||
m_searchResultFile->clear();
|
|
||||||
m_searchResultDir->clear();
|
|
||||||
SearchManager::m_mutex1.unlock();
|
|
||||||
return;
|
return;
|
||||||
|
if((i.isDir() && m_value == DIR_SEARCH_VALUE) || (i.isFile() && m_value == FILE_SEARCH_VALUE)) {
|
||||||
|
SearchPluginIface::ResultInfo ri;
|
||||||
|
if(SearchManager::creatResultInfo(ri,i.absoluteFilePath())) {
|
||||||
|
SearchManager::m_mutex1.lock();
|
||||||
|
if(m_uniqueSymbol == SearchManager::uniqueSymbol1) {
|
||||||
|
m_searchResult->enqueue(ri);
|
||||||
|
SearchManager::m_mutex1.unlock();
|
||||||
|
} else {
|
||||||
|
SearchManager::m_mutex1.unlock();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,19 +37,19 @@
|
||||||
#include <QThread>
|
#include <QThread>
|
||||||
#include <QUrl>
|
#include <QUrl>
|
||||||
|
|
||||||
|
#include "search-plugin-iface.h"
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
#include "global-settings.h"
|
#include "global-settings.h"
|
||||||
#include "chinese-segmentation.h"
|
#include "chinese-segmentation.h"
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
|
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
|
||||||
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
|
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
|
||||||
|
|
||||||
namespace Zeeker {
|
namespace Zeeker {
|
||||||
|
|
||||||
class LIBSEARCH_EXPORT SearchManager : public QObject {
|
class LIBSEARCH_EXPORT SearchManager : public QObject {
|
||||||
friend class FileSearch;
|
friend class FileSearch;
|
||||||
friend class FileContentSearch;
|
friend class FileContentSearch;
|
||||||
|
friend class DirectSearch;
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
public:
|
public:
|
||||||
explicit SearchManager(QObject *parent = nullptr);
|
explicit SearchManager(QObject *parent = nullptr);
|
||||||
|
@ -72,33 +72,15 @@ Q_SIGNALS:
|
||||||
void resultDir(QQueue<QString> *);
|
void resultDir(QQueue<QString> *);
|
||||||
void resultContent(QQueue<QPair<QString, QStringList>> *);
|
void resultContent(QQueue<QPair<QString, QStringList>> *);
|
||||||
private:
|
private:
|
||||||
// int keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value,unsigned slot = 1,int begin = 0, int num = 20);
|
|
||||||
// int keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief SearchManager::creatQueryForFileSearch
|
|
||||||
* This part shall be optimized frequently to provide a more stable search function.
|
|
||||||
* @param keyword
|
|
||||||
* @param db
|
|
||||||
* @return Xapian::Query
|
|
||||||
*/
|
|
||||||
// Xapian::Query creatQueryForFileSearch(QString keyword, Xapian::Database &db);
|
|
||||||
// Xapian::Query creatQueryForContentSearch(QString keyword, Xapian::Database &db);
|
|
||||||
|
|
||||||
// int getResult(size_t uniqueSymbol, Xapian::MSet &result, QString value);
|
|
||||||
// int getContentResult(size_t uniqueSymbol, Xapian::MSet &result,std::string &keyWord);
|
|
||||||
|
|
||||||
static bool isBlocked(QString &path);
|
static bool isBlocked(QString &path);
|
||||||
|
static bool creatResultInfo(Zeeker::SearchPluginIface::ResultInfo &ri, QString path);
|
||||||
|
|
||||||
// QQueue<QString> *m_search_result_file = nullptr;
|
|
||||||
// QQueue<QString> *m_search_result_dir = nullptr;
|
|
||||||
// QQueue<QPair<QString,QStringList>> *m_search_result_content = nullptr;
|
|
||||||
QThreadPool m_pool;
|
QThreadPool m_pool;
|
||||||
};
|
};
|
||||||
|
|
||||||
class FileSearch : public QRunnable {
|
class FileSearch : public QRunnable {
|
||||||
public:
|
public:
|
||||||
explicit FileSearch(QQueue<QString> *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot = 1, int begin = 0, int num = 20);
|
explicit FileSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot = 1, int begin = 0, int num = 20);
|
||||||
~FileSearch();
|
~FileSearch();
|
||||||
protected:
|
protected:
|
||||||
void run();
|
void run();
|
||||||
|
@ -107,7 +89,7 @@ private:
|
||||||
Xapian::Query creatQueryForFileSearch(Xapian::Database &db);
|
Xapian::Query creatQueryForFileSearch(Xapian::Database &db);
|
||||||
int getResult(Xapian::MSet &result);
|
int getResult(Xapian::MSet &result);
|
||||||
|
|
||||||
QQueue<QString> *m_search_result = nullptr;
|
DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr;
|
||||||
QString m_value;
|
QString m_value;
|
||||||
unsigned m_slot = 1;
|
unsigned m_slot = 1;
|
||||||
size_t m_uniqueSymbol;
|
size_t m_uniqueSymbol;
|
||||||
|
@ -118,7 +100,7 @@ private:
|
||||||
|
|
||||||
class FileContentSearch : public QRunnable {
|
class FileContentSearch : public QRunnable {
|
||||||
public:
|
public:
|
||||||
explicit FileContentSearch(QQueue<QPair<QString, QStringList>> *searchResult, size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
|
explicit FileContentSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
|
||||||
~FileContentSearch();
|
~FileContentSearch();
|
||||||
protected:
|
protected:
|
||||||
void run();
|
void run();
|
||||||
|
@ -126,7 +108,7 @@ private:
|
||||||
int keywordSearchContent();
|
int keywordSearchContent();
|
||||||
int getResult(Xapian::MSet &result, std::string &keyWord);
|
int getResult(Xapian::MSet &result, std::string &keyWord);
|
||||||
|
|
||||||
QQueue<QPair<QString, QStringList>> *m_search_result = nullptr;
|
DataQueue<SearchPluginIface::ResultInfo> *m_search_result = nullptr;
|
||||||
size_t m_uniqueSymbol;
|
size_t m_uniqueSymbol;
|
||||||
QString m_keyword;
|
QString m_keyword;
|
||||||
int m_begin = 0;
|
int m_begin = 0;
|
||||||
|
@ -135,14 +117,14 @@ private:
|
||||||
|
|
||||||
class DirectSearch : public QRunnable {
|
class DirectSearch : public QRunnable {
|
||||||
public:
|
public:
|
||||||
explicit DirectSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, size_t uniqueSymbol);
|
explicit DirectSearch(QString keyword, DataQueue<SearchPluginIface::ResultInfo> *searchResult, QString value, size_t uniqueSymbol);
|
||||||
protected:
|
protected:
|
||||||
void run();
|
void run();
|
||||||
private:
|
private:
|
||||||
QString m_keyword;
|
QString m_keyword;
|
||||||
QQueue<QString>* m_searchResultFile = nullptr;
|
DataQueue<SearchPluginIface::ResultInfo>* m_searchResult = nullptr;
|
||||||
QQueue<QString>* m_searchResultDir = nullptr;
|
|
||||||
size_t m_uniqueSymbol;
|
size_t m_uniqueSymbol;
|
||||||
|
QString m_value;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,25 +1,49 @@
|
||||||
#include "searchmethodmanager.h"
|
#include "searchmethodmanager.h"
|
||||||
using namespace Zeeker;
|
using namespace Zeeker;
|
||||||
|
SearchMethodManager::SearchMethodManager()
|
||||||
|
{
|
||||||
|
m_iw = InotifyWatch::getInstance(HOME_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
void SearchMethodManager::searchMethod(FileUtils::SearchMethod sm) {
|
void SearchMethodManager::searchMethod(FileUtils::SearchMethod sm) {
|
||||||
qWarning() << "searchMethod start: " << static_cast<int>(sm);
|
qWarning() << "searchMethod start: " << static_cast<int>(sm);
|
||||||
if(FileUtils::SearchMethod::INDEXSEARCH == sm || FileUtils::SearchMethod::DIRECTSEARCH == sm) {
|
if(FileUtils::SearchMethod::INDEXSEARCH == sm || FileUtils::SearchMethod::DIRECTSEARCH == sm) {
|
||||||
FileUtils::searchMethod = sm;
|
FileUtils::searchMethod = sm;
|
||||||
} else {
|
} else {
|
||||||
printf("enum class error!!!\n");
|
|
||||||
qWarning("enum class error!!!\n");
|
qWarning("enum class error!!!\n");
|
||||||
}
|
}
|
||||||
if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::_index_status) {
|
if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::_index_status) {
|
||||||
|
|
||||||
|
// Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running.
|
||||||
|
QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search");
|
||||||
|
if(!fifoDir.exists())
|
||||||
|
qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath());
|
||||||
|
|
||||||
|
unlink(UKUI_SEARCH_PIPE_PATH);
|
||||||
|
int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777);
|
||||||
|
if(retval == -1) {
|
||||||
|
qCritical() << "creat fifo error!!";
|
||||||
|
syslog(LOG_ERR, "creat fifo error!!\n");
|
||||||
|
assert(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
qDebug() << "create fifo success\n";
|
||||||
qWarning() << "start first index";
|
qWarning() << "start first index";
|
||||||
// m_fi = FirstIndex("/home/zhangzihao/Desktop");
|
|
||||||
m_fi.start();
|
m_fi.start();
|
||||||
qWarning() << "start inotify index";
|
qWarning() << "start inotify index";
|
||||||
// InotifyIndex ii("/home");
|
// InotifyIndex ii("/home");
|
||||||
// ii.start();
|
// ii.start();
|
||||||
this->m_ii = InotifyIndex::getInstance("/home");
|
// this->m_ii = InotifyIndex::getInstance("/home");
|
||||||
if(!this->m_ii->isRunning()) {
|
// if(!this->m_ii->isRunning()) {
|
||||||
this->m_ii->start();
|
// this->m_ii->start();
|
||||||
|
// }
|
||||||
|
if(!this->m_iw->isRunning()) {
|
||||||
|
this->m_iw->start();
|
||||||
}
|
}
|
||||||
qDebug() << "Search method has been set to INDEXSEARCH";
|
qDebug() << "Search method has been set to INDEXSEARCH";
|
||||||
}
|
}
|
||||||
|
if(FileUtils::SearchMethod::DIRECTSEARCH == sm) {
|
||||||
|
m_iw->stopWatch();
|
||||||
|
}
|
||||||
qWarning() << "searchMethod end: " << static_cast<int>(FileUtils::searchMethod);
|
qWarning() << "searchMethod end: " << static_cast<int>(FileUtils::searchMethod);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,15 +2,17 @@
|
||||||
#define SEARCHMETHODMANAGER_H
|
#define SEARCHMETHODMANAGER_H
|
||||||
|
|
||||||
#include "first-index.h"
|
#include "first-index.h"
|
||||||
#include "inotify-index.h"
|
//#include "inotify-index.h"
|
||||||
|
#include "inotify-watch.h"
|
||||||
namespace Zeeker {
|
namespace Zeeker {
|
||||||
class SearchMethodManager {
|
class SearchMethodManager {
|
||||||
public:
|
public:
|
||||||
SearchMethodManager() = default;
|
SearchMethodManager();
|
||||||
void searchMethod(FileUtils::SearchMethod sm);
|
void searchMethod(FileUtils::SearchMethod sm);
|
||||||
private:
|
private:
|
||||||
FirstIndex m_fi;
|
FirstIndex m_fi;
|
||||||
InotifyIndex* m_ii;
|
// InotifyIndex* m_ii;
|
||||||
|
InotifyWatch *m_iw = nullptr;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,5 +42,15 @@ void UkuiSearchQDBus::setInotifyMaxUserWatches() {
|
||||||
// sysctl
|
// sysctl
|
||||||
this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2");
|
this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2");
|
||||||
// /etc/sysctl.conf
|
// /etc/sysctl.conf
|
||||||
// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3");
|
// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3");
|
||||||
|
}
|
||||||
|
|
||||||
|
int UkuiSearchQDBus::addInotifyUserInstances(int addNum)
|
||||||
|
{
|
||||||
|
QDBusReply<int> reply = tmpSystemQDBusInterface->call("AddInotifyMaxUserInstance", addNum);
|
||||||
|
if(reply.isValid()) {
|
||||||
|
qDebug() << "Set inotify_max_user_instances to" << reply.value();
|
||||||
|
} else {
|
||||||
|
qWarning() << "Call AddInotifyMaxUserInstance failed!";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,12 +21,14 @@
|
||||||
#define UKUISEARCHQDBUS_H
|
#define UKUISEARCHQDBUS_H
|
||||||
|
|
||||||
#include <QDBusInterface>
|
#include <QDBusInterface>
|
||||||
|
#include <QDBusReply>
|
||||||
namespace Zeeker {
|
namespace Zeeker {
|
||||||
class UkuiSearchQDBus {
|
class UkuiSearchQDBus {
|
||||||
public:
|
public:
|
||||||
UkuiSearchQDBus();
|
UkuiSearchQDBus();
|
||||||
~UkuiSearchQDBus();
|
~UkuiSearchQDBus();
|
||||||
void setInotifyMaxUserWatches();
|
void setInotifyMaxUserWatches();
|
||||||
|
int addInotifyUserInstances(int addNum);
|
||||||
private:
|
private:
|
||||||
QDBusInterface* tmpSystemQDBusInterface;
|
QDBusInterface* tmpSystemQDBusInterface;
|
||||||
};
|
};
|
||||||
|
|
|
@ -26,12 +26,15 @@
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
#include "global-settings.h"
|
#include "global-settings.h"
|
||||||
|
|
||||||
|
#include "plugininterface/search-plugin-iface.h"
|
||||||
|
#include "plugininterface/data-queue.h"
|
||||||
#include "index/searchmethodmanager.h"
|
#include "index/searchmethodmanager.h"
|
||||||
#include "index/first-index.h"
|
#include "index/first-index.h"
|
||||||
#include "index/ukui-search-qdbus.h"
|
#include "index/ukui-search-qdbus.h"
|
||||||
#include "index/inotify-index.h"
|
#include "index/inotify-index.h"
|
||||||
#include "index/search-manager.h"
|
#include "index/search-manager.h"
|
||||||
|
|
||||||
|
|
||||||
namespace Zeeker {
|
namespace Zeeker {
|
||||||
//class LIBSEARCH_EXPORT GlobalSearch {
|
//class LIBSEARCH_EXPORT GlobalSearch {
|
||||||
//public:
|
//public:
|
||||||
|
|
|
@ -24,16 +24,17 @@ DEFINES += PLUGIN_INSTALL_DIRS='\\"$${PLUGIN_INSTALL_DIRS}\\"'
|
||||||
# In order to do so, uncomment the following line.
|
# In order to do so, uncomment the following line.
|
||||||
# You can also select to disable deprecated APIs only up to a certain version of Qt.
|
# You can also select to disable deprecated APIs only up to a certain version of Qt.
|
||||||
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
||||||
|
include(pluginmanage/plugin-manager.pri)
|
||||||
|
include(plugininterface/plugin-interface.pri)
|
||||||
include(index/index.pri)
|
include(index/index.pri)
|
||||||
include(parser/parser.pri))
|
include(parser/parser.pri))
|
||||||
include(appsearch/appsearch.pri)
|
include(appsearch/appsearch.pri)
|
||||||
include(settingsearch/settingsearch.pri))
|
include(settingsearch/settingsearch.pri))
|
||||||
include(plugininterface/plugin-interface.pri)
|
|
||||||
include(pluginmanage/plugin-manager.pri)
|
|
||||||
|
|
||||||
LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation
|
LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation
|
||||||
LIBS += -lxapian -lquazip5 -luchardet
|
LIBS += -lxapian -lquazip5 -luchardet #-L/usr/local/lib/libjemalloc -ljemalloc
|
||||||
|
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
file-utils.cpp \
|
file-utils.cpp \
|
||||||
|
@ -52,13 +53,13 @@ HEADERS += \
|
||||||
RESOURCES += \
|
RESOURCES += \
|
||||||
resource1.qrc \
|
resource1.qrc \
|
||||||
|
|
||||||
#TRANSLATIONS += \
|
TRANSLATIONS += \
|
||||||
# ../translations/libsearch/libukui-search_zh_CN.ts
|
../translations/libukui-search/libukui-search_zh_CN.ts
|
||||||
|
|
||||||
#qm_files.path = /usr/share/ukui-search/translations/libsearch/
|
qm_files.path = /usr/share/ukui-search/translations/
|
||||||
#qm_files.files = $$OUT_PWD/.qm/*.qm
|
qm_files.files = $$OUT_PWD/.qm/*.qm
|
||||||
|
|
||||||
#INSTALLS += qm_files
|
INSTALLS += qm_files
|
||||||
|
|
||||||
|
|
||||||
# Default rules for deployment.
|
# Default rules for deployment.
|
||||||
|
@ -67,11 +68,9 @@ unix {
|
||||||
INSTALLS += target
|
INSTALLS += target
|
||||||
|
|
||||||
header.path = /usr/include/ukui-search
|
header.path = /usr/include/ukui-search
|
||||||
header.files += *.h index/*.h appsearch/*.h settingsearch/*.h
|
header.files += *.h index/*.h appsearch/*.h settingsearch/*.h plugininterface/*.h
|
||||||
INSTALLS += header
|
INSTALLS += header
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
INCLUDEPATH += $$PWD/../libchinese-segmentation
|
INCLUDEPATH += $$PWD/../libchinese-segmentation
|
||||||
DEPENDPATH += $$PWD/../libchinese-segmentation
|
DEPENDPATH += $$PWD/../libchinese-segmentation
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
INCLUDEPATH += $$PWD
|
||||||
|
INCLUDEPATH += $$PWD/plugininterface
|
||||||
|
INCLUDEPATH += $$PWD/index
|
||||||
|
INCLUDEPATH += $$PWD/parser
|
||||||
|
INCLUDEPATH += $$PWD/pluginmanage
|
||||||
|
INCLUDEPATH += $$PWD/settingsearch
|
||||||
|
INCLUDEPATH += $$PWD/appsearch
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue