diff --git a/.gitignore b/.gitignore index fab7372..6e844ee 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,6 @@ *.so.* *_pch.h.cpp *_resource.rc -*.qm .#* *.*# core diff --git a/README.md b/README.md index c00ecf2..d375118 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,17 @@ # ukui-search -[WIP] UKUI Search is a user-wide desktop search feature of UKUI desktop environment. +[dWIP] UKUI Search is a user-wide desktop search feature of UKUI desktop environment. + +Build from source + + + git clone https://github.com/ukui/ukui-search.git + + cd ukui-search && mkdir build && cd build + + qmake .. && make + + sudo make install + + /usr/bin/ukui-search diff --git a/data/ukui-search-menu.desktop b/data/ukui-search-menu.desktop index 07b5690..085b75c 100644 --- a/data/ukui-search-menu.desktop +++ b/data/ukui-search-menu.desktop @@ -9,6 +9,7 @@ Exec=/usr/bin/ukui-search -s Type=Application Icon=kylin-search X-UKUI-AutoRestart=true +NoDisplay=true OnlyShowIn=UKUI X-UKUI-Autostart-Phase=Application Terminal=false diff --git a/debian/changelog b/debian/changelog index 7f17c1f..28cde68 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +ukui-search (0.4.0+0530) v101; urgency=medium + + * Bug 57129 + * 任务 无 + + -- zhangpengfei Sun, 30 May 2021 11:21:37 +0800 + ukui-search (0.4.0+0520) v101; urgency=medium * Bug 55034,55545,55326,55496 diff --git a/frontend/control/stack-pages/home-page.cpp b/frontend/control/stack-pages/home-page.cpp index ae57e8a..a616c07 100644 --- a/frontend/control/stack-pages/home-page.cpp +++ b/frontend/control/stack-pages/home-page.cpp @@ -130,10 +130,10 @@ void HomePage::createSection(const QString §ion_name, const HomePageItemShap this->appendSection(section); connect(section, &HomePageSection::requestAction, this, [ = ](const QString &key, const QString &action, const QString &pluginId) { SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(pluginId); - if (plugin) { - plugin->openAction(action, key); - } else { - qWarning()<<"Get plugin failed!"; - } +// if (plugin) { +// plugin->openAction(action, key); +// } else { +// qWarning()<<"Get plugin failed!"; +// } }); } diff --git a/frontend/control/stack-pages/search-page-section.cpp b/frontend/control/stack-pages/search-page-section.cpp index aece8cb..07ad05f 100644 --- a/frontend/control/stack-pages/search-page-section.cpp +++ b/frontend/control/stack-pages/search-page-section.cpp @@ -27,11 +27,11 @@ using namespace Zeeker; #define DETAIL_BACKGROUND_COLOR QColor(0, 0, 0, 0) #define DETAIL_WIDGET_TRANSPARENT 0.04 #define DETAIL_WIDGET_BORDER_RADIUS 4 -#define DETAIL_WIDGET_MARGINS 8,40,40,8 -#define DETAIL_FRAME_MARGINS 8,0,8,0 +#define DETAIL_WIDGET_MARGINS 8,0,8,0 +#define DETAIL_FRAME_MARGINS 8,0,0,0 #define DETAIL_ICON_HEIGHT 120 #define NAME_LABEL_WIDTH 280 -#define ICON_SIZE QSize(96, 96) +#define ICON_SIZE QSize(120, 120) #define LINE_STYLE "QFrame{background: rgba(0,0,0,0.2);}" #define ACTION_NORMAL_COLOR QColor(55, 144, 250, 255) #define ACTION_HOVER_COLOR QColor(64, 169, 251, 255) @@ -138,8 +138,18 @@ QString escapeHtml(const QString & str) { void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginIface::ResultInfo &info) { - m_iconLabel->setPixmap(info.icon.pixmap(info.icon.actualSize(ICON_SIZE))); - m_iconLabel->show(); + clearLayout(m_descFrameLyt); + clearLayout(m_previewFrameLyt); + if(SearchPluginManager::getInstance()->getPlugin(plugin_name)->isPreviewEnable(info.actionKey,info.type)) { + m_iconLabel->hide(); + m_previewFrameLyt->addWidget(SearchPluginManager::getInstance()->getPlugin(plugin_name)->previewPage(info.actionKey,info.type, m_previewFrame), 0 , Qt::AlignHCenter); + m_previewFrameLyt->setContentsMargins(0,0,0,0); + m_previewFrame->show(); + } else { + m_previewFrame->hide(); + m_iconLabel->setPixmap(info.icon.pixmap(info.icon.actualSize(ICON_SIZE))); + m_iconLabel->show(); + } QFontMetrics fontMetrics = m_nameLabel->fontMetrics(); QString name = fontMetrics.elidedText(info.name, Qt::ElideRight, NAME_LABEL_WIDTH - 8); m_nameLabel->setText(QString("

%1

").arg(escapeHtml(name))); @@ -147,6 +157,7 @@ void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginI m_pluginLabel->setText(plugin_name); m_nameFrame->show(); m_line_1->show(); + if (info.description.length() > 0) { //NEW_TODO 样式待优化 clearLayout(m_descFrameLyt); @@ -154,7 +165,7 @@ void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginI QLabel * descLabel = new QLabel(m_descFrame); descLabel->setTextFormat(Qt::PlainText); descLabel->setWordWrap(true); - QString show_desc = desc.key + ": " + desc.value; + QString show_desc = desc.key + " " + desc.value; descLabel->setText(show_desc); m_descFrameLyt->addWidget(descLabel); } @@ -162,8 +173,8 @@ void DetailWidget::setWidgetInfo(const QString &plugin_name, const SearchPluginI m_line_2->show(); } clearLayout(m_actionFrameLyt); - Q_FOREACH (auto action, info.actionList) { - ActionLabel * actionLabel = new ActionLabel(action, info.key, plugin_name, m_actionFrame); + Q_FOREACH (SearchPluginIface::Actioninfo actioninfo, SearchPluginManager::getInstance()->getPlugin(plugin_name)->getActioninfo(info.type)) { + ActionLabel * actionLabel = new ActionLabel(actioninfo.displayName, info.actionKey, actioninfo.actionkey, plugin_name, info.type, m_actionFrame); m_actionFrameLyt->addWidget(actionLabel); } m_actionFrame->show(); @@ -189,6 +200,8 @@ void DetailWidget::initUi() m_iconLabel = new QLabel(this); m_iconLabel->setFixedHeight(DETAIL_ICON_HEIGHT); m_iconLabel->setAlignment(Qt::AlignCenter); + m_previewFrame = new QFrame(this); + m_previewFrameLyt = new QHBoxLayout(m_previewFrame); m_nameFrame = new QFrame(this); m_nameFrameLyt = new QHBoxLayout(m_nameFrame); @@ -222,6 +235,7 @@ void DetailWidget::initUi() m_actionFrameLyt->setContentsMargins(DETAIL_FRAME_MARGINS); m_mainLyt->addWidget(m_iconLabel); + m_mainLyt->addWidget(m_previewFrame, 0, Qt::AlignHCenter); m_mainLyt->addWidget(m_nameFrame); m_mainLyt->addWidget(m_line_1); m_mainLyt->addWidget(m_descFrame); @@ -248,7 +262,7 @@ void DetailWidget::paintEvent(QPaintEvent * event) void DetailWidget::clearLayout(QLayout *layout) { - if(! layout) return; + if(!layout) return; QLayoutItem * child; while((child = layout->takeAt(0)) != 0) { if(child->widget()) { @@ -259,11 +273,13 @@ void DetailWidget::clearLayout(QLayout *layout) child = NULL; } -ActionLabel::ActionLabel(const QString &action, const QString &key, const QString &plugin, QWidget *parent) : QLabel(parent) +ActionLabel::ActionLabel(const QString &action, const QString &key, const int &ActionKey, const QString &pluginId, const int type, QWidget *parent) : QLabel(parent) { m_action = action; m_key = key; - m_plugin = plugin; + m_actionKey = ActionKey; + m_type = type; + m_pluginId = pluginId; this->initUi(); this->installEventFilter(this); } @@ -287,9 +303,9 @@ bool ActionLabel::eventFilter(QObject *watched, QEvent *event) this->setForegroundRole(QPalette::Dark); return true; } else if(event->type() == QEvent::MouseButtonRelease) { - SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin); + SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_pluginId); if (plugin) - plugin->openAction(m_action, m_key); + plugin->openAction(m_actionKey, m_key, m_type); else qWarning()<<"Get plugin failed!"; this->setForegroundRole(QPalette::Light); diff --git a/frontend/control/stack-pages/search-page-section.h b/frontend/control/stack-pages/search-page-section.h index 86016fa..2c615be 100644 --- a/frontend/control/stack-pages/search-page-section.h +++ b/frontend/control/stack-pages/search-page-section.h @@ -25,7 +25,7 @@ #include #include #include "result-view.h" -#include "plugininterface/search-plugin-iface.h" +#include "search-plugin-iface.h" namespace Zeeker { class ResultArea : public QScrollArea @@ -70,6 +70,8 @@ private: void clearLayout(QLayout *); QVBoxLayout * m_mainLyt = nullptr; QLabel * m_iconLabel = nullptr; + QFrame *m_previewFrame = nullptr; + QHBoxLayout *m_previewFrameLyt = nullptr; QFrame * m_nameFrame = nullptr; QHBoxLayout * m_nameFrameLyt = nullptr; QLabel * m_nameLabel = nullptr; @@ -100,13 +102,15 @@ class ActionLabel : public QLabel { Q_OBJECT public: - ActionLabel(const QString &action, const QString &key, const QString &plugin, QWidget *parent = nullptr); + ActionLabel(const QString &action, const QString &key, const int &ActionKey, const QString &pluginId, const int type = 0, QWidget *parent = nullptr); ~ActionLabel() = default; private: void initUi(); QString m_action; QString m_key; - QString m_plugin; + int m_actionKey; + int m_type = 0; + QString m_pluginId; protected: bool eventFilter(QObject *, QEvent *); diff --git a/frontend/control/stack-pages/search-page.cpp b/frontend/control/stack-pages/search-page.cpp index 5542b74..110ee38 100644 --- a/frontend/control/stack-pages/search-page.cpp +++ b/frontend/control/stack-pages/search-page.cpp @@ -21,8 +21,8 @@ #include "search-page.h" using namespace Zeeker; -#define RESULT_WIDTH 240 -#define DETAIL_WIDTH 400 +#define RESULT_WIDTH 266 +#define DETAIL_WIDTH 374 SearchPage::SearchPage(QWidget *parent) : QWidget(parent) { @@ -54,6 +54,7 @@ void SearchPage::appendPlugin(const QString &plugin_id) void SearchPage::initUi() { m_splitter = new QSplitter(this); + m_splitter->setContentsMargins(0, 0, 0, 0); m_resultArea = new ResultArea(m_splitter); m_detailArea = new DetailArea(m_splitter); m_splitter->addWidget(m_resultArea); diff --git a/frontend/frontend.pro b/frontend/frontend.pro index 7922326..93281f8 100644 --- a/frontend/frontend.pro +++ b/frontend/frontend.pro @@ -20,13 +20,14 @@ DEFINES += QT_DEPRECATED_WARNINGS # In order to do so, uncomment the following line. # You can also select to disable deprecated APIs only up to a certain version of Qt. #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 - +include(../libsearch/libukui-search-headers.pri) include(control/control.pri) include(model/model.pri) include(xatom/xatom.pri) include(singleapplication/qt-single-application.pri) include(view/view.pri) + SOURCES += \ main.cpp \ mainwindow.cpp diff --git a/frontend/main.cpp b/frontend/main.cpp index c703f26..df3571b 100644 --- a/frontend/main.cpp +++ b/frontend/main.cpp @@ -253,6 +253,14 @@ int main(int argc, char *argv[]) { qDebug() << "Load translations file" << QLocale() << "failed!"; } + QTranslator lib_translator; + try { + if(! lib_translator.load("/usr/share/ukui-search/translations/libukui-search_" + QLocale::system().name())) throw - 1; + app.installTranslator(&lib_translator); + } catch(...) { + qDebug() << "Load translations file" << QLocale() << "failed!"; + } + //set main window to the center of screen MainWindow *w = new MainWindow; qApp->setWindowIcon(QIcon::fromTheme("kylin-search")); @@ -275,7 +283,7 @@ int main(int argc, char *argv[]) { QObject::connect(&app, &QtSingleApplication::messageReceived, w, &MainWindow::bootOptionsFilter); // Start app search thread - AppMatch::getAppMatch()->start(); +// AppMatch::getAppMatch()->start(); // NEW_TODO // Set threads which in global thread pool expiry time in 5ms, some prolems here diff --git a/frontend/mainwindow.cpp b/frontend/mainwindow.cpp index 754db57..5ec49f4 100644 --- a/frontend/mainwindow.cpp +++ b/frontend/mainwindow.cpp @@ -37,17 +37,17 @@ #include "qt-single-application.h" #include "global-settings.h" -#define MAIN_MARGINS 16,8,16,16 +#define MAIN_MARGINS 16,16,16,16 #define TITLE_MARGINS 0,0,0,0 #define UKUI_SEARCH_SCHEMAS "org.ukui.search.settings" #define SEARCH_METHOD_KEY "indexSearch" #define WEB_ENGINE_KEY "webEngine" -#define WINDOW_WIDTH 640 -#define WINDOW_HEIGHT 590 +#define WINDOW_WIDTH 680 +#define WINDOW_HEIGHT 600 #define TITLE_HEIGHT 40 #define WINDOW_ICON_SIZE 24 #define SETTING_BTN_SIZE 30 -#define SEARCH_BAR_SIZE 44 +#define SEARCH_BAR_SIZE 48 #define ASK_INDEX_TIME 5*1000 #define RESEARCH_TIME 10*1000 @@ -95,7 +95,7 @@ MainWindow::MainWindow(QWidget *parent) : //NEW_TODO, register plugins // SearchPluginManager::getInstance()->registerPlugin(\\); // m_stackedWidget->setPlugins(SearchPluginManager::getInstance()->getPluginIds()); - m_stackedWidget->setPlugins(QStringList()<<"File"<<"Folder"); + m_stackedWidget->setPlugins(SearchPluginManager::getInstance()->getPluginIds()); } MainWindow::~MainWindow() { @@ -140,25 +140,25 @@ void MainWindow::initUi() { mainlayout->setContentsMargins(MAIN_MARGINS); m_frame->setLayout(mainlayout); - m_titleFrame = new QFrame(m_frame);//标题栏 - m_titleFrame->setFixedHeight(TITLE_HEIGHT); - m_titleLyt = new QHBoxLayout(m_titleFrame); - m_titleLyt->setContentsMargins(TITLE_MARGINS); - m_iconLabel = new QLabel(m_titleFrame); - m_iconLabel->setFixedSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE); - m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE))); - m_titleLabel = new QLabel(m_titleFrame); - m_titleLabel->setText(tr("Search")); - m_settingsBtn = new QPushButton(m_titleFrame); - m_settingsBtn->setFixedSize(SETTING_BTN_SIZE, SETTING_BTN_SIZE); - m_settingsBtn->setIcon(QIcon::fromTheme("document-properties-symbolic")); - m_settingsBtn->setProperty("useIconHighlightEffect", 0x2); - m_settingsBtn->setProperty("isWindowButton", 0x01); - m_settingsBtn->setFlat(true); - m_titleLyt->addWidget(m_iconLabel); - m_titleLyt->addWidget(m_titleLabel); - m_titleLyt->addStretch(); - m_titleLyt->addWidget(m_settingsBtn); +// m_titleFrame = new QFrame(m_frame);//标题栏 +// m_titleFrame->setFixedHeight(TITLE_HEIGHT); +// m_titleLyt = new QHBoxLayout(m_titleFrame); +// m_titleLyt->setContentsMargins(TITLE_MARGINS); +// m_iconLabel = new QLabel(m_titleFrame); +// m_iconLabel->setFixedSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE); +// m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE))); +// m_titleLabel = new QLabel(m_titleFrame); +// m_titleLabel->setText(tr("Search")); +// m_settingsBtn = new QPushButton(m_titleFrame); +// m_settingsBtn->setFixedSize(SETTING_BTN_SIZE, SETTING_BTN_SIZE); +// m_settingsBtn->setIcon(QIcon::fromTheme("document-properties-symbolic")); +// m_settingsBtn->setProperty("useIconHighlightEffect", 0x2); +// m_settingsBtn->setProperty("isWindowButton", 0x01); +// m_settingsBtn->setFlat(true); +// m_titleLyt->addWidget(m_iconLabel); +// m_titleLyt->addWidget(m_titleLabel); +// m_titleLyt->addStretch(); +// m_titleLyt->addWidget(m_settingsBtn); m_stackedWidget = new StackedWidget(m_frame);//内容栏 m_searchWidget = new SeachBarWidget(this); @@ -166,9 +166,9 @@ void MainWindow::initUi() { m_searchWidget->setLayout(m_searchLayout); m_searchWidget->setFixedHeight(SEARCH_BAR_SIZE); - mainlayout->addWidget(m_titleFrame); - mainlayout->addWidget(m_stackedWidget); +// mainlayout->addWidget(m_titleFrame); mainlayout->addWidget(m_searchWidget); + mainlayout->addWidget(m_stackedWidget); //创建索引询问弹窗 m_askDialog = new CreateIndexAskDialog(this); @@ -197,9 +197,9 @@ void MainWindow::initConnections() }); connect(m_settingsBtn, &QPushButton::clicked, this, &MainWindow::settingsBtnClickedSlot); //主题改变时,更新自定义标题栏的图标 - connect(qApp, &QApplication::paletteChanged, this, [ = ]() { - m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE))); - }); +// connect(qApp, &QApplication::paletteChanged, this, [ = ]() { +// m_iconLabel->setPixmap(QIcon::fromTheme("kylin-search").pixmap(QSize(WINDOW_ICON_SIZE, WINDOW_ICON_SIZE))); +// }); connect(m_searchLayout, &SearchBarHLayout::requestSearchKeyword, this, &MainWindow::searchKeywordSlot); connect(m_stackedWidget, &StackedWidget::effectiveSearch, m_searchLayout, &SearchBarHLayout::effectiveSearchRecord); } diff --git a/frontend/model/search-result-manager.cpp b/frontend/model/search-result-manager.cpp index 2dfcdb2..fbff1dc 100644 --- a/frontend/model/search-result-manager.cpp +++ b/frontend/model/search-result-manager.cpp @@ -24,7 +24,7 @@ using namespace Zeeker; SearchResultManager::SearchResultManager(const QString& plugin_id, QObject *parent) : QObject(parent) { m_plugin_id = plugin_id; - m_result_queue = new QQueue; + m_result_queue = new DataQueue; m_get_result_thread = new ReceiveResultThread(m_result_queue); initConnections(); } @@ -39,59 +39,59 @@ void SearchResultManager::startSearch(const QString &keyword) } m_result_queue->clear(); SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id); -// plugin->KeywordSearch(keyword, m_result_queue); + plugin->KeywordSearch(keyword, m_result_queue); /*********************测试用数据*********************/ - SearchPluginIface::ResultInfo test_info; - if (m_plugin_id == "File") { - test_info.icon = QIcon::fromTheme("ukui-control-center"); - test_info.name = "搜索"; - QVector desc; - SearchPluginIface::DescriptionInfo desc_1; - desc_1.key = "描述"; - desc_1.value = "控制面板搜索插件"; - desc.append(desc_1); - QStringList actions; - actions.append("打开"); - test_info.description = desc; - test_info.actionList = actions; - m_result_queue->append(test_info); - } else { - test_info.icon = QIcon::fromTheme("unknown"); - test_info.name = "文件12345abcde.txt"; - QVector desc; - SearchPluginIface::DescriptionInfo desc_1; - SearchPluginIface::DescriptionInfo desc_2; - desc_1.key = "描述"; - desc_1.value = "一个文件"; - desc_2.key = "路径"; - desc_2.value = "一个路径/a/b/c/d/e/fffffff/文件12345abcde.txt"; - desc.append(desc_1); - desc.append(desc_2); - QStringList actions; - actions.append("打开"); - actions.append("复制路径"); - test_info.description = desc; - test_info.actionList = actions; - SearchPluginIface::ResultInfo test_info_1 = test_info; - test_info_1.name = "文件1"; - SearchPluginIface::ResultInfo test_info_2 = test_info; - test_info_2.name = "文件2"; - SearchPluginIface::ResultInfo test_info_3 = test_info; - test_info_3.name = "文件3"; - SearchPluginIface::ResultInfo test_info_4 = test_info; - test_info_4.name = "文件4"; - SearchPluginIface::ResultInfo test_info_5 = test_info; - test_info_5.name = "文件5"; - SearchPluginIface::ResultInfo test_info_6 = test_info; - test_info_6.name = "文件6"; - m_result_queue->append(test_info); - m_result_queue->append(test_info_1); - m_result_queue->append(test_info_2); - m_result_queue->append(test_info_3); - m_result_queue->append(test_info_4); - m_result_queue->append(test_info_5); - m_result_queue->append(test_info_6); - } +// SearchPluginIface::ResultInfo test_info; +// if (m_plugin_id == "File") { +// test_info.icon = QIcon::fromTheme("ukui-control-center"); +// test_info.name = "搜索"; +// QVector desc; +// SearchPluginIface::DescriptionInfo desc_1; +// desc_1.key = "描述"; +// desc_1.value = "控制面板搜索插件"; +// desc.append(desc_1); +// QStringList actions; +// actions.append("打开"); +// test_info.description = desc; +// test_info.actionList = actions; +// m_result_queue->append(test_info); +// } else { +// test_info.icon = QIcon::fromTheme("unknown"); +// test_info.name = "文件12345abcde.txt"; +// QVector desc; +// SearchPluginIface::DescriptionInfo desc_1; +// SearchPluginIface::DescriptionInfo desc_2; +// desc_1.key = "描述"; +// desc_1.value = "一个文件"; +// desc_2.key = "路径"; +// desc_2.value = "一个路径/a/b/c/d/e/fffffff/文件12345abcde.txt"; +// desc.append(desc_1); +// desc.append(desc_2); +// QStringList actions; +// actions.append("打开"); +// actions.append("复制路径"); +// test_info.description = desc; +// test_info.actionList = actions; +// SearchPluginIface::ResultInfo test_info_1 = test_info; +// test_info_1.name = "文件1"; +// SearchPluginIface::ResultInfo test_info_2 = test_info; +// test_info_2.name = "文件2"; +// SearchPluginIface::ResultInfo test_info_3 = test_info; +// test_info_3.name = "文件3"; +// SearchPluginIface::ResultInfo test_info_4 = test_info; +// test_info_4.name = "文件4"; +// SearchPluginIface::ResultInfo test_info_5 = test_info; +// test_info_5.name = "文件5"; +// SearchPluginIface::ResultInfo test_info_6 = test_info; +// test_info_6.name = "文件6"; +// m_result_queue->append(test_info); +// m_result_queue->append(test_info_1); +// m_result_queue->append(test_info_2); +// m_result_queue->append(test_info_3); +// m_result_queue->append(test_info_4); +// m_result_queue->append(test_info_5); +// m_result_queue->append(test_info_6); +// } /********************测试用数据********************/ } @@ -112,7 +112,7 @@ void SearchResultManager::initConnections() connect(m_get_result_thread, &ReceiveResultThread::gotResultInfo, this, &SearchResultManager::gotResultInfo); } -ReceiveResultThread::ReceiveResultThread(QQueue * result_queue, QObject *parent) +ReceiveResultThread::ReceiveResultThread(DataQueue * result_queue, QObject *parent) { m_result_queue = result_queue; } diff --git a/frontend/model/search-result-manager.h b/frontend/model/search-result-manager.h index cf3efaa..ada728f 100644 --- a/frontend/model/search-result-manager.h +++ b/frontend/model/search-result-manager.h @@ -32,14 +32,14 @@ namespace Zeeker { class ReceiveResultThread : public QThread { Q_OBJECT public: - ReceiveResultThread(QQueue * result_queue, QObject * parent = nullptr); + ReceiveResultThread(DataQueue * result_queue, QObject * parent = nullptr); ~ReceiveResultThread() = default; void stop(); protected: void run() override; private: - QQueue * m_result_queue; + DataQueue * m_result_queue; Q_SIGNALS: void gotResultInfo(const SearchPluginIface::ResultInfo&); @@ -60,7 +60,7 @@ public Q_SLOTS: private: void initConnections(); QString m_plugin_id; - QQueue * m_result_queue; + DataQueue * m_result_queue; ReceiveResultThread * m_get_result_thread = nullptr; Q_SIGNALS: diff --git a/frontend/model/search-result-model.cpp b/frontend/model/search-result-model.cpp index 2ee5ad8..a3928ba 100644 --- a/frontend/model/search-result-model.cpp +++ b/frontend/model/search-result-model.cpp @@ -126,14 +126,14 @@ const bool &SearchResultModel::isExpanded() QStringList SearchResultModel::getActions(const QModelIndex &index) { if (m_item->m_result_info_list.length() > index.row() && index.row() >= 0) - return m_item->m_result_info_list.at(index.row()).actionList; +// return m_item->m_result_info_list.at(index.row()).actionList; return QStringList(); } QString SearchResultModel::getKey(const QModelIndex &index) { if (m_item->m_result_info_list.length() > index.row() && index.row() >= 0) - return m_item->m_result_info_list.at(index.row()).key; +// return m_item->m_result_info_list.at(index.row()).key; return NULL; } diff --git a/frontend/res/qt-translations/qt_zh_CN.qm b/frontend/res/qt-translations/qt_zh_CN.qm new file mode 100644 index 0000000..623b8e3 Binary files /dev/null and b/frontend/res/qt-translations/qt_zh_CN.qm differ diff --git a/frontend/view/result-view-delegate.cpp b/frontend/view/result-view-delegate.cpp index 814e53b..908c28a 100644 --- a/frontend/view/result-view-delegate.cpp +++ b/frontend/view/result-view-delegate.cpp @@ -12,6 +12,13 @@ void ResultViewDelegate::setSearchKeyword(const QString ®FindKeyWords) m_regFindKeyWords = regFindKeyWords; } +QSize ResultViewDelegate::sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const +{ + QSize size = QStyledItemDelegate::sizeHint(option,index); + size.setHeight(size.height() + 10); + return size; +} + void ResultViewDelegate::paint(QPainter * painter, const QStyleOptionViewItem & option, const QModelIndex & index) const { QStyleOptionViewItemV4 optionV4 = option; initStyleOption(&optionV4, index); @@ -30,7 +37,7 @@ void ResultViewDelegate::paint(QPainter * painter, const QStyleOptionViewItem & ctx.palette.setColor(QPalette::Text, optionV4.palette.color(QPalette::Active, QPalette::HighlightedText)); QRect textRect = style->subElementRect(QStyle::SE_ItemViewItemText, &optionV4); - textRect.adjust(0, -5, 0, 0); + textRect.adjust(0, 0, 0, 0); painter->save(); painter->translate(textRect.topLeft()); painter->setClipRect(textRect.translated(-textRect.topLeft())); diff --git a/frontend/view/result-view-delegate.h b/frontend/view/result-view-delegate.h index 7e885a4..7dcd1af 100644 --- a/frontend/view/result-view-delegate.h +++ b/frontend/view/result-view-delegate.h @@ -35,6 +35,8 @@ public: explicit ResultViewDelegate(QObject *parent = nullptr); ~ResultViewDelegate() = default; void setSearchKeyword(const QString &); +protected: + QSize sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const; private: QString m_regFindKeyWords = 0; void paint(QPainter *, const QStyleOptionViewItem &, const QModelIndex &) const override; diff --git a/frontend/view/result-view.cpp b/frontend/view/result-view.cpp index ae83c24..0625a3c 100644 --- a/frontend/view/result-view.cpp +++ b/frontend/view/result-view.cpp @@ -155,11 +155,11 @@ void ResultView::onRowDoubleClickedSlot(const QModelIndex &index) SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id); try { if (plugin) { - if (!info.actionList.isEmpty()) { - plugin->openAction(info.actionList.at(0), info.key); - } else { - throw -2; - } +// if (!info.actionList.isEmpty()) { +// plugin->openAction(info.actionList.at(0), info.key); +// } else { +// throw -2; +// } } else { throw -1; } @@ -211,7 +211,7 @@ void ResultView::onMenuTriggered(QAction *action) //NEW_TODO 接口调整后需要修改 SearchPluginIface *plugin = SearchPluginManager::getInstance()->getPlugin(m_plugin_id); if (plugin) { - plugin->openAction(action->text(), m_model->getKey(this->currentIndex())); +// plugin->openAction(action->text(), m_model->getKey(this->currentIndex())); } else { qWarning()<<"Get plugin failed!"; } diff --git a/libchinese-segmentation/chinese-segmentation.cpp b/libchinese-segmentation/chinese-segmentation.cpp index 477f5b7..3b6f04c 100644 --- a/libchinese-segmentation/chinese-segmentation.cpp +++ b/libchinese-segmentation/chinese-segmentation.cpp @@ -30,12 +30,12 @@ ChineseSegmentation::ChineseSegmentation() { const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8"; const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8"; const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8"; - m_jieba = new cppjieba::Jieba(DICT_PATH, HMM_PATH, USER_DICT_PATH, IDF_PATH, - STOP_WORD_PATH); + STOP_WORD_PATH, + ""); } ChineseSegmentation::~ChineseSegmentation() { @@ -58,7 +58,7 @@ QVector ChineseSegmentation::callSegement(std::string s) { // str.squeeze(); const size_t topk = -1; - std::vector keywordres; + std::vector keywordres; ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk); std::string().swap(s); QVector vecNeeds; @@ -66,13 +66,20 @@ QVector ChineseSegmentation::callSegement(std::string s) { keywordres.clear(); // keywordres.shrink_to_fit(); - - return vecNeeds; } -void ChineseSegmentation::convert(std::vector &keywordres, QVector &kw) { +std::vector ChineseSegmentation::callSegementStd(const std::string &str) { + + const size_t topk = -1; + std::vector keywordres; + ChineseSegmentation::m_jieba->extractor.Extract(str, keywordres, topk); + + return keywordres; +} + +void ChineseSegmentation::convert(std::vector &keywordres, QVector &kw) { for(auto i : keywordres) { SKeyWord temp; temp.word = i.word; diff --git a/libchinese-segmentation/chinese-segmentation.h b/libchinese-segmentation/chinese-segmentation.h index 4a12c23..01e8046 100644 --- a/libchinese-segmentation/chinese-segmentation.h +++ b/libchinese-segmentation/chinese-segmentation.h @@ -48,7 +48,10 @@ public: static ChineseSegmentation *getInstance(); ~ChineseSegmentation(); QVector callSegement(std::string s); - void convert(std::vector& keywordres, QVector& kw); + //新添加callSegementStd函数,修改返回值为std::vector并简化内部处理流程--jxx20210517 + //修改函数入参形式为引用,去掉Qstring与std::string转换代码--jxx20210519 + std::vector callSegementStd(const std::string& str); + void convert(std::vector& keywordres, QVector& kw); private: static QMutex m_mutex; cppjieba::Jieba *m_jieba; diff --git a/libchinese-segmentation/cppjieba/DatTrie.hpp b/libchinese-segmentation/cppjieba/DatTrie.hpp new file mode 100644 index 0000000..0709a4f --- /dev/null +++ b/libchinese-segmentation/cppjieba/DatTrie.hpp @@ -0,0 +1,511 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "limonp/Md5.hpp" +#include "Unicode.hpp" +#include "darts.h" + +namespace cppjieba { + +using std::pair; + +struct DatElement { + string word; + string tag; + double weight = 0; + + bool operator < (const DatElement & b) const { + if (word == b.word) { + return this->weight > b.weight; + } + + return this->word < b.word; + } +}; + +struct IdfElement { + string word; + double idf = 0; + + bool operator < (const IdfElement & b) const { + if (word == b.word) { + return this->idf > b.idf; + } + + return this->word < b.word; + } +}; + +inline std::ostream & operator << (std::ostream& os, const DatElement & elem) { + return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight; +} + +struct DatMemElem { + double weight = 0.0; + char tag[8] = {}; + + void SetTag(const string & str) { + memset(&tag[0], 0, sizeof(tag)); + strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1)); + } + + string GetTag() const { + return &tag[0]; + } +}; + +inline std::ostream & operator << (std::ostream& os, const DatMemElem & elem) { + return os << "/tag=" << elem.GetTag() << "/weight=" << elem.weight; +} + +struct DatDag { + limonp::LocalVector > nexts; + double max_weight; + int max_next; +}; + +typedef Darts::DoubleArray JiebaDAT; + + +struct CacheFileHeader { + char md5_hex[32] = {}; + double min_weight = 0; + uint32_t elements_num = 0; + uint32_t dat_size = 0; +}; + +static_assert(sizeof(DatMemElem) == 16, "DatMemElem length invalid"); +static_assert((sizeof(CacheFileHeader) % sizeof(DatMemElem)) == 0, "DatMemElem CacheFileHeader length equal"); + + +class DatTrie { +public: + DatTrie() {} + ~DatTrie() { + ::munmap(mmap_addr_, mmap_length_); + mmap_addr_ = nullptr; + mmap_length_ = 0; + + ::close(mmap_fd_); + mmap_fd_ = -1; + } + + const DatMemElem * Find(const string & key) const { + JiebaDAT::result_pair_type find_result; + dat_.exactMatchSearch(key.c_str(), find_result); + + if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) { + return nullptr; + } + + return &elements_ptr_[ find_result.value ]; + } + + const double Find(const string & key, std::size_t length, std::size_t node_pos) const { + JiebaDAT::result_pair_type find_result; + dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos); + + if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) { + return -1; + } + + return idf_elements_ptr_[ find_result.value ]; + } + + void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, + vector&res, size_t max_word_len) const { + + res.clear(); + res.resize(end - begin); + + string text_str; + EncodeRunesToString(begin, end, text_str); + + static const size_t max_num = 128; + JiebaDAT::result_pair_type result_pairs[max_num] = {}; + + for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) { + + std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num); + + res[i].nexts.push_back(pair(i + 1, nullptr)); + + for (std::size_t idx = 0; idx < num_results; ++idx) { + auto & match = result_pairs[idx]; + + if ((match.value < 0) || ((size_t)match.value >= elements_num_)) { + continue; + } + + auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length); + + if (char_num > max_word_len) { + continue; + } + + auto pValue = &elements_ptr_[match.value]; + + if (1 == char_num) { + res[i].nexts[0].second = pValue; + continue; + } + + res[i].nexts.push_back(pair(i + char_num, pValue)); + } + + begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune); + } + } + + void Find_Reverse(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, + vector&res, size_t max_word_len) const { + + res.clear(); + res.resize(end - begin); + + string text_str; + EncodeRunesToString(begin, end, text_str); + + static const size_t max_num = 128; + JiebaDAT::result_pair_type result_pairs[max_num] = {}; + + size_t str_size = end - begin; + for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) { + + begin_pos -= (end - i - 1)->len; + std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num); + res[str_size - i - 1].nexts.push_back(pair(str_size - i, nullptr)); + + for (std::size_t idx = 0; idx < num_results; ++idx) { + auto & match = result_pairs[idx]; + if ((match.value < 0) || ((size_t)match.value >= elements_num_)) { + continue; + } + + auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length); + + if (char_num > max_word_len) { + continue; + } + + auto pValue = &elements_ptr_[match.value]; + + if (1 == char_num) { + res[str_size - i - 1].nexts[0].second = pValue; + continue; + } + + res[str_size - i - 1].nexts.push_back(pair(str_size - 1 - i + char_num, pValue)); + } + } + } + void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, + vector& words, size_t max_word_len) const { + + string text_str; + EncodeRunesToString(begin, end, text_str); + + static const size_t max_num = 128; + JiebaDAT::result_pair_type result_pairs[max_num] = {};//存放字典查询结果 + size_t str_size = end - begin; + double max_weight[str_size];//存放逆向路径最大weight + for (size_t i = 0; ilen; + + std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num); + if (0 == num_results) {//字典不存在则单独分词 + val = min_weight_; + + if (nextPos < str_size) { + val += max_weight[nextPos]; + } + if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) { + max_weight[nextPos - 1] = val; + max_next[nextPos - 1] = nextPos; + } + } else {//字典存在则根据查询结果数量计算最大概率路径 + for (std::size_t idx = 0; idx < num_results; ++idx) { + auto & match = result_pairs[idx]; + if ((match.value < 0) || ((size_t)match.value >= elements_num_)) { + continue; + } + auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length); + if (char_num > max_word_len) { + continue; + } + auto pValue = &elements_ptr_[match.value]; + + val = pValue->weight; + if (1 == char_num) { + if (nextPos < str_size) { + val += max_weight[nextPos]; + } + if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) { + max_weight[nextPos - 1] = val; + max_next[nextPos - 1] = nextPos; + } + } else { + if (nextPos - 1 + char_num < str_size) { + val += max_weight[nextPos - 1 + char_num]; + } + if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) { + max_weight[nextPos - 1] = val; + max_next[nextPos - 1] = nextPos - 1 + char_num; + } + } + } + } + } + for (size_t i = 0; i < str_size;) {//统计动态规划结果 + assert(max_next[i] > i); + assert(max_next[i] <= str_size); + WordRange wr(begin + i, begin + max_next[i] - 1); + words.push_back(wr); + i = max_next[i]; + } + } + double GetMinWeight() const { + return min_weight_; + } + + void SetMinWeight(double d) { + min_weight_ = d ; + } + + bool InitBuildDat(vector& elements, const string & dat_cache_file, const string & md5) { + BuildDatCache(elements, dat_cache_file, md5); + return InitAttachDat(dat_cache_file, md5); + } + + bool InitBuildDat(vector& elements, const string & dat_cache_file, const string & md5) { + BuildDatCache(elements, dat_cache_file, md5); + return InitIdfAttachDat(dat_cache_file, md5); + } + + bool InitAttachDat(const string & dat_cache_file, const string & md5) { + mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY); + + if (mmap_fd_ < 0) { + return false; + } + + const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END); + assert(seek_off >= 0); + mmap_length_ = seek_off; + + mmap_addr_ = reinterpret_cast(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0)); + assert(MAP_FAILED != mmap_addr_); + + assert(mmap_length_ >= sizeof(CacheFileHeader)); + CacheFileHeader & header = *reinterpret_cast(mmap_addr_); + elements_num_ = header.elements_num; + min_weight_ = header.min_weight; + assert(sizeof(header.md5_hex) == md5.size()); + + if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) { + return false; + } + + assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(DatMemElem) + header.dat_size * dat_.unit_size()); + elements_ptr_ = (const DatMemElem *)(mmap_addr_ + sizeof(header)); + const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(DatMemElem) * elements_num_; + dat_.set_array(dat_ptr, header.dat_size); + return true; + } + + bool InitIdfAttachDat(const string & dat_cache_file, const string & md5) { + mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY); + + if (mmap_fd_ < 0) { + return false; + } + + const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END); + assert(seek_off >= 0); + mmap_length_ = seek_off; + + mmap_addr_ = reinterpret_cast(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0)); + assert(MAP_FAILED != mmap_addr_); + + assert(mmap_length_ >= sizeof(CacheFileHeader)); + CacheFileHeader & header = *reinterpret_cast(mmap_addr_); + elements_num_ = header.elements_num; + min_weight_ = header.min_weight; + assert(sizeof(header.md5_hex) == md5.size()); + + if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) { + return false; + } + + assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double) + header.dat_size * dat_.unit_size()); + idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header)); + const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_; + dat_.set_array(dat_ptr, header.dat_size); + return true; + } + +private: + void BuildDatCache(vector& elements, const string & dat_cache_file, const string & md5) { + std::sort(elements.begin(), elements.end()); + + vector keys_ptr_vec; + vector values_vec; + vector mem_elem_vec; + + keys_ptr_vec.reserve(elements.size()); + values_vec.reserve(elements.size()); + mem_elem_vec.reserve(elements.size()); + + CacheFileHeader header; + header.min_weight = min_weight_; + assert(sizeof(header.md5_hex) == md5.size()); + memcpy(&header.md5_hex[0], md5.c_str(), md5.size()); + + for (size_t i = 0; i < elements.size(); ++i) { + keys_ptr_vec.push_back(elements[i].word.data()); + values_vec.push_back(i); + mem_elem_vec.push_back(DatMemElem()); + auto & mem_elem = mem_elem_vec.back(); + mem_elem.weight = elements[i].weight; + mem_elem.SetTag(elements[i].tag); + } + + auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]); + assert(0 == ret); + header.elements_num = mem_elem_vec.size(); + header.dat_size = dat_.size(); + + { + string tmp_filepath = string(dat_cache_file) + "_XXXXXX"; + ::umask(S_IWGRP | S_IWOTH); + //const int fd =::mkstemp(&tmp_filepath[0]); + //原mkstemp用法有误,已修复--jxx20210519 + const int fd =::mkstemp((char *)tmp_filepath.data()); + qDebug() << "mkstemp :" << errno << tmp_filepath.data(); + assert(fd >= 0); + ::fchmod(fd, 0644); + + auto write_bytes = ::write(fd, (const char *)&header, sizeof(header)); + write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size()); + write_bytes += ::write(fd, dat_.array(), dat_.total_size()); + + assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size()); + ::close(fd); + + const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str()); + assert(0 == rename_ret); + } + } + + void BuildDatCache(vector& elements, const string & dat_cache_file, const string & md5) { + std::sort(elements.begin(), elements.end()); + + vector keys_ptr_vec; + vector values_vec; + vector mem_elem_vec; + + keys_ptr_vec.reserve(elements.size()); + values_vec.reserve(elements.size()); + mem_elem_vec.reserve(elements.size()); + + CacheFileHeader header; + header.min_weight = min_weight_; + assert(sizeof(header.md5_hex) == md5.size()); + memcpy(&header.md5_hex[0], md5.c_str(), md5.size()); + + for (size_t i = 0; i < elements.size(); ++i) { + keys_ptr_vec.push_back(elements[i].word.data()); + values_vec.push_back(i); + mem_elem_vec.push_back(elements[i].idf); + } + + auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]); + assert(0 == ret); + header.elements_num = mem_elem_vec.size(); + header.dat_size = dat_.size(); + + { + string tmp_filepath = string(dat_cache_file) + "_XXXXXX"; + ::umask(S_IWGRP | S_IWOTH); + //const int fd =::mkstemp(&tmp_filepath[0]); + //原mkstemp用法有误,已修复--jxx20210519 + const int fd =::mkstemp((char *)tmp_filepath.data()); + qDebug() << "mkstemp error:" << errno << tmp_filepath.data(); + assert(fd >= 0); + ::fchmod(fd, 0644); + + auto write_bytes = ::write(fd, (const char *)&header, sizeof(header)); + write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(double) * mem_elem_vec.size()); + write_bytes += ::write(fd, dat_.array(), dat_.total_size()); + + assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(double) + dat_.total_size()); + ::close(fd); + + const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str()); + assert(0 == rename_ret); + } + } + + DatTrie(const DatTrie &); + DatTrie &operator=(const DatTrie &); + +private: + JiebaDAT dat_; + const DatMemElem * elements_ptr_ = nullptr; + const double * idf_elements_ptr_= nullptr; + size_t elements_num_ = 0; + double min_weight_ = 0; + + int mmap_fd_ = -1; + size_t mmap_length_ = 0; + char * mmap_addr_ = nullptr; +}; + + +inline string CalcFileListMD5(const string & files_list, size_t & file_size_sum) { + limonp::MD5 md5; + + const auto files = limonp::Split(files_list, "|;"); + file_size_sum = 0; + + for (auto const & local_path : files) { + const int fd = ::open(local_path.c_str(), O_RDONLY); + if( fd < 0){ + continue; + } + auto const len = ::lseek(fd, 0, SEEK_END); + if (len > 0) { + void * addr = ::mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); + assert(MAP_FAILED != addr); + + md5.Update((unsigned char *) addr, len); + file_size_sum += len; + + ::munmap(addr, len); + } + ::close(fd); + } + + md5.Final(); + return string(md5.digestChars); +} + +} diff --git a/libchinese-segmentation/cppjieba/DictTrie.hpp b/libchinese-segmentation/cppjieba/DictTrie.hpp index 736b685..44a6cb9 100644 --- a/libchinese-segmentation/cppjieba/DictTrie.hpp +++ b/libchinese-segmentation/cppjieba/DictTrie.hpp @@ -1,23 +1,4 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_DICT_TRIE_HPP -#define CPPJIEBA_DICT_TRIE_HPP +#pragma once #include #include @@ -31,8 +12,8 @@ #include "limonp/StringUtil.hpp" #include "limonp/Logging.hpp" #include "Unicode.hpp" -#include "Trie.hpp" - +#include "DatTrie.hpp" +#include namespace cppjieba { using namespace limonp; @@ -50,58 +31,29 @@ public: WordWeightMax, }; // enum UserWordWeightOption - DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) { - Init(dict_path, user_dict_paths, user_word_weight_opt); + DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "", + UserWordWeightOption user_word_weight_opt = WordWeightMedian) { + Init(dict_path, user_dict_paths, dat_cache_path, user_word_weight_opt); } - ~DictTrie() { - delete trie_; - } + ~DictTrie() {} - bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) { - DictUnit node_info; - if(!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) { - return false; - } - active_node_infos_.push_back(node_info); - trie_->InsertNode(node_info.word, &active_node_infos_.back()); - return true; - } - - bool InsertUserWord(const string& word, int freq, const string& tag = UNKNOWN_TAG) { - DictUnit node_info; - double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ; - if(!MakeNodeInfo(node_info, word, weight, tag)) { - return false; - } - active_node_infos_.push_back(node_info); - trie_->InsertNode(node_info.word, &active_node_infos_.back()); - return true; - } - - const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const { - return trie_->Find(begin, end); + const DatMemElem* Find(const string & word) const { + return dat_.Find(word); } void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, - vector&res, + vector&res, size_t max_word_len = MAX_WORD_LENGTH) const { - trie_->Find(begin, end, res, max_word_len); + dat_.Find(begin, end, res, max_word_len); } - bool Find(const string& word) { - const DictUnit *tmp = NULL; - RuneStrArray runes; - if(!DecodeRunesInString(word, runes)) { - XLOG(ERROR) << "Decode failed."; - } - tmp = Find(runes.begin(), runes.end()); - if(tmp == NULL) { - return false; - } else { - return true; - } + void Find(RuneStrArray::const_iterator begin, + RuneStrArray::const_iterator end, + vector& words, + size_t max_word_len = MAX_WORD_LENGTH) const { + dat_.Find(begin, end, words, max_word_len); } bool IsUserDictSingleChineseWord(const Rune& word) const { @@ -109,182 +61,176 @@ public: } double GetMinWeight() const { - return min_weight_; + return dat_.GetMinWeight(); } - void InserUserDictNode(const string& line) { + size_t GetTotalDictSize() const { + return total_dict_size_; + } + + void InserUserDictNode(const string& line, bool saveNodeInfo = true) { vector buf; - DictUnit node_info; + DatElement node_info; Split(line, buf, " "); - if(buf.size() == 1) { - MakeNodeInfo(node_info, - buf[0], - user_word_default_weight_, - UNKNOWN_TAG); - } else if(buf.size() == 2) { - MakeNodeInfo(node_info, - buf[0], - user_word_default_weight_, - buf[1]); - } else if(buf.size() == 3) { - int freq = atoi(buf[1].c_str()); - assert(freq_sum_ > 0.0); - double weight = log(1.0 * freq / freq_sum_); - MakeNodeInfo(node_info, buf[0], weight, buf[2]); + + if (buf.size() == 0) { + return; } - static_node_infos_.push_back(node_info); - if(node_info.word.size() == 1) { - user_dict_single_chinese_word_.insert(node_info.word[0]); + + node_info.word = buf[0]; + node_info.weight = user_word_default_weight_; + node_info.tag = UNKNOWN_TAG; + + if (buf.size() == 2) { + node_info.tag = buf[1]; + } else if (buf.size() == 3) { + if (freq_sum_ > 0.0) { + const int freq = atoi(buf[1].c_str()); + node_info.weight = log(1.0 * freq / freq_sum_); + node_info.tag = buf[2]; + } + } + + if (saveNodeInfo) { + static_node_infos_.push_back(node_info); + } + + if (Utf8CharNum(node_info.word) == 1) { + RuneArray word; + + if (DecodeRunesInString(node_info.word, word)) { + user_dict_single_chinese_word_.insert(word[0]); + } else { + XLOG(ERROR) << "Decode " << node_info.word << " failed."; + } } } - void LoadUserDict(const vector& buf) { - for(size_t i = 0; i < buf.size(); i++) { - InserUserDictNode(buf[i]); - } - } - - void LoadUserDict(const set& buf) { - std::set::const_iterator iter; - for(iter = buf.begin(); iter != buf.end(); iter++) { - InserUserDictNode(*iter); - } - } - - void LoadUserDict(const string& filePaths) { + void LoadUserDict(const string& filePaths, bool saveNodeInfo = true) { vector files = limonp::Split(filePaths, "|;"); - size_t lineno = 0; - for(size_t i = 0; i < files.size(); i++) { + + for (size_t i = 0; i < files.size(); i++) { ifstream ifs(files[i].c_str()); XCHECK(ifs.is_open()) << "open " << files[i] << " failed"; string line; - for(; getline(ifs, line); lineno++) { - if(line.size() == 0) { + for (; getline(ifs, line);) { + if (line.size() == 0) { continue; } - InserUserDictNode(line); + + InserUserDictNode(line, saveNodeInfo); } } } private: - void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) { - LoadDict(dict_path); + void Init(const string& dict_path, const string& user_dict_paths, string dat_cache_path, + UserWordWeightOption user_word_weight_opt) { + const auto dict_list = dict_path + "|" + user_dict_paths; + size_t file_size_sum = 0; + const string md5 = CalcFileListMD5(dict_list, file_size_sum); + + if (dat_cache_path.empty()) { + //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519 + dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache"; + } + QString path = QString::fromStdString(dat_cache_path); + qDebug() << "#########Dict path:" << path; + if (dat_.InitAttachDat(dat_cache_path, md5)) { + LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_; + total_dict_size_ = file_size_sum; + return; + } + + LoadDefaultDict(dict_path); freq_sum_ = CalcFreqSum(static_node_infos_); CalculateWeight(static_node_infos_, freq_sum_); - SetStaticWordWeights(user_word_weight_opt); + double min_weight = 0; + SetStaticWordWeights(user_word_weight_opt, min_weight); + dat_.SetMinWeight(min_weight); - if(user_dict_paths.size()) { - LoadUserDict(user_dict_paths); - } - Shrink(static_node_infos_); - CreateTrie(static_node_infos_); + LoadUserDict(user_dict_paths); + const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5); + assert(build_ret); + total_dict_size_ = file_size_sum; + vector().swap(static_node_infos_); } - void CreateTrie(const vector& dictUnits) { - assert(dictUnits.size()); - vector words; - vector valuePointers; - for(size_t i = 0 ; i < dictUnits.size(); i ++) { - words.push_back(dictUnits[i].word); - valuePointers.push_back(&dictUnits[i]); - } - - trie_ = new Trie(words, valuePointers); - } - - - - - bool MakeNodeInfo(DictUnit& node_info, - const string& word, - double weight, - const string& tag) { - if(!DecodeRunesInString(word, node_info.word)) { - XLOG(ERROR) << "Decode " << word << " failed."; - return false; - } - node_info.weight = weight; - node_info.tag = tag; - return true; - } - - void LoadDict(const string& filePath) { + void LoadDefaultDict(const string& filePath) { ifstream ifs(filePath.c_str()); XCHECK(ifs.is_open()) << "open " << filePath << " failed."; string line; vector buf; - DictUnit node_info; - for(size_t lineno = 0; getline(ifs, line); lineno++) { + for (; getline(ifs, line);) { Split(line, buf, " "); XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line; - MakeNodeInfo(node_info, - buf[0], - atof(buf[1].c_str()), - buf[2]); + DatElement node_info; + node_info.word = buf[0]; + node_info.weight = atof(buf[1].c_str()); + node_info.tag = buf[2]; static_node_infos_.push_back(node_info); } } - static bool WeightCompare(const DictUnit& lhs, const DictUnit& rhs) { + static bool WeightCompare(const DatElement& lhs, const DatElement& rhs) { return lhs.weight < rhs.weight; } - void SetStaticWordWeights(UserWordWeightOption option) { + void SetStaticWordWeights(UserWordWeightOption option, double & min_weight) { XCHECK(!static_node_infos_.empty()); - vector x = static_node_infos_; + vector x = static_node_infos_; sort(x.begin(), x.end(), WeightCompare); - min_weight_ = x[0].weight; - max_weight_ = x[x.size() - 1].weight; - median_weight_ = x[x.size() / 2].weight; - switch(option) { - case WordWeightMin: - user_word_default_weight_ = min_weight_; - break; - case WordWeightMedian: - user_word_default_weight_ = median_weight_; - break; - default: - user_word_default_weight_ = max_weight_; - break; + if(x.empty()){ + return; + } + min_weight = x[0].weight; + const double max_weight_ = x[x.size() - 1].weight; + const double median_weight_ = x[x.size() / 2].weight; + + switch (option) { + case WordWeightMin: + user_word_default_weight_ = min_weight; + break; + + case WordWeightMedian: + user_word_default_weight_ = median_weight_; + break; + + default: + user_word_default_weight_ = max_weight_; + break; } } - double CalcFreqSum(const vector& node_infos) const { + double CalcFreqSum(const vector& node_infos) const { double sum = 0.0; - for(size_t i = 0; i < node_infos.size(); i++) { + + for (size_t i = 0; i < node_infos.size(); i++) { sum += node_infos[i].weight; } + return sum; } - void CalculateWeight(vector& node_infos, double sum) const { - assert(sum > 0.0); - for(size_t i = 0; i < node_infos.size(); i++) { - DictUnit& node_info = node_infos[i]; + void CalculateWeight(vector& node_infos, double sum) const { + for (size_t i = 0; i < node_infos.size(); i++) { + DatElement& node_info = node_infos[i]; assert(node_info.weight > 0.0); node_info.weight = log(double(node_info.weight) / sum); } } - void Shrink(vector& units) const { - vector(units.begin(), units.end()).swap(units); - } - - vector static_node_infos_; - deque active_node_infos_; // must not be vector - Trie * trie_; +private: + vector static_node_infos_; + size_t total_dict_size_ = 0; + DatTrie dat_; double freq_sum_; - double min_weight_; - double max_weight_; - double median_weight_; double user_word_default_weight_; unordered_set user_dict_single_chinese_word_; }; } -#endif diff --git a/libchinese-segmentation/cppjieba/FullSegment.hpp b/libchinese-segmentation/cppjieba/FullSegment.hpp index 1a2bca7..1652b75 100644 --- a/libchinese-segmentation/cppjieba/FullSegment.hpp +++ b/libchinese-segmentation/cppjieba/FullSegment.hpp @@ -1,23 +1,4 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_FULLSEGMENT_H -#define CPPJIEBA_FULLSEGMENT_H +#pragma once #include #include @@ -30,82 +11,48 @@ namespace cppjieba { class FullSegment: public SegmentBase { public: - FullSegment(const string& dictPath) { - dictTrie_ = new DictTrie(dictPath); - isNeedDestroy_ = true; - } FullSegment(const DictTrie* dictTrie) - : dictTrie_(dictTrie), isNeedDestroy_(false) { + : dictTrie_(dictTrie) { assert(dictTrie_); } - ~FullSegment() { - if(isNeedDestroy_) { - delete dictTrie_; - } - } - void Cut(const string& sentence, - vector& words) const { - vector tmp; - Cut(sentence, tmp); - GetStringsFromWords(tmp, words); - } - void Cut(const string& sentence, - vector& words) const { - PreFilter pre_filter(symbols_, sentence); - PreFilter::Range range; - vector wrs; - wrs.reserve(sentence.size() / 2); - while(pre_filter.HasNext()) { - range = pre_filter.Next(); - Cut(range.begin, range.end, wrs); - } - words.clear(); - words.reserve(wrs.size()); - GetWordsFromWordRanges(sentence, wrs, words); - } - void Cut(RuneStrArray::const_iterator begin, - RuneStrArray::const_iterator end, - vector& res) const { - // result of searching in trie tree - LocalVector > tRes; + ~FullSegment() { } - // max index of res's words - size_t maxIdx = 0; - - // always equals to (uItr - begin) - size_t uIdx = 0; - - // tmp variables - size_t wordLen = 0; + virtual void Cut(RuneStrArray::const_iterator begin, + RuneStrArray::const_iterator end, + vector& res, bool, size_t) const override { assert(dictTrie_); - vector dags; + vector dags; dictTrie_->Find(begin, end, dags); - for(size_t i = 0; i < dags.size(); i++) { - for(size_t j = 0; j < dags[i].nexts.size(); j++) { - size_t nextoffset = dags[i].nexts[j].first; + size_t max_word_end_pos = 0; + + for (size_t i = 0; i < dags.size(); i++) { + for (const auto & kv : dags[i].nexts) { + const size_t nextoffset = kv.first - 1; assert(nextoffset < dags.size()); - const DictUnit* du = dags[i].nexts[j].second; - if(du == NULL) { - if(dags[i].nexts.size() == 1 && maxIdx <= uIdx) { - WordRange wr(begin + i, begin + nextoffset); - res.push_back(wr); - } - } else { - wordLen = du->word.size(); - if(wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) { - WordRange wr(begin + i, begin + nextoffset); - res.push_back(wr); - } + const auto wordLen = nextoffset - i + 1; + const bool is_not_covered_single_word = ((dags[i].nexts.size() == 1) && (max_word_end_pos <= i)); + const bool is_oov = (nullptr == kv.second); //Out-of-Vocabulary + + if ((is_not_covered_single_word) || ((not is_oov) && (wordLen >= 2))) { + WordRange wr(begin + i, begin + nextoffset); + res.push_back(wr); } - maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx; + + max_word_end_pos = max(max_word_end_pos, nextoffset + 1); } - uIdx++; } } + + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm, + size_t) const override { + + } + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map& res, bool hmm, + size_t) const override { + + } private: const DictTrie* dictTrie_; - bool isNeedDestroy_; }; } -#endif diff --git a/libchinese-segmentation/cppjieba/HMMModel.hpp b/libchinese-segmentation/cppjieba/HMMModel.hpp index 5491a0c..6d68bed 100644 --- a/libchinese-segmentation/cppjieba/HMMModel.hpp +++ b/libchinese-segmentation/cppjieba/HMMModel.hpp @@ -1,26 +1,6 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_HMMMODEL_H -#define CPPJIEBA_HMMMODEL_H +#pragma once #include "limonp/StringUtil.hpp" -#include "Trie.hpp" namespace cppjieba { @@ -59,16 +39,18 @@ struct HMMModel { XCHECK(GetLine(ifile, line)); Split(line, tmp, " "); XCHECK(tmp.size() == STATUS_SUM); - for(size_t j = 0; j < tmp.size(); j++) { + + for (size_t j = 0; j < tmp.size(); j++) { startProb[j] = atof(tmp[j].c_str()); } //Load transProb - for(size_t i = 0; i < STATUS_SUM; i++) { + for (size_t i = 0; i < STATUS_SUM; i++) { XCHECK(GetLine(ifile, line)); Split(line, tmp, " "); XCHECK(tmp.size() == STATUS_SUM); - for(size_t j = 0; j < STATUS_SUM; j++) { + + for (size_t j = 0; j < tmp.size(); j++) { transProb[i][j] = atof(tmp[j].c_str()); } } @@ -92,43 +74,55 @@ struct HMMModel { double GetEmitProb(const EmitProbMap* ptMp, Rune key, double defVal)const { EmitProbMap::const_iterator cit = ptMp->find(key); - if(cit == ptMp->end()) { + + if (cit == ptMp->end()) { return defVal; } + return cit->second; } bool GetLine(ifstream& ifile, string& line) { - while(getline(ifile, line)) { + while (getline(ifile, line)) { Trim(line); - if(line.empty()) { + + if (line.empty()) { continue; } - if(StartsWith(line, "#")) { + + if (StartsWith(line, "#")) { continue; } + return true; } + return false; } bool LoadEmitProb(const string& line, EmitProbMap& mp) { - if(line.empty()) { + if (line.empty()) { return false; } + vector tmp, tmp2; - Unicode unicode; + RuneArray unicode; Split(line, tmp, ","); - for(size_t i = 0; i < tmp.size(); i++) { + + for (size_t i = 0; i < tmp.size(); i++) { Split(tmp[i], tmp2, ":"); - if(2 != tmp2.size()) { + + if (2 != tmp2.size()) { XLOG(ERROR) << "emitProb illegal."; return false; } - if(!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) { + + if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) { XLOG(ERROR) << "TransCode failed."; return false; } + mp[unicode[0]] = atof(tmp2[1].c_str()); } + return true; } @@ -144,4 +138,3 @@ struct HMMModel { } // namespace cppjieba -#endif diff --git a/libchinese-segmentation/cppjieba/HMMSegment.hpp b/libchinese-segmentation/cppjieba/HMMSegment.hpp index 8168ac3..1a9937b 100644 --- a/libchinese-segmentation/cppjieba/HMMSegment.hpp +++ b/libchinese-segmentation/cppjieba/HMMSegment.hpp @@ -1,23 +1,4 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIBEA_HMMSEGMENT_H -#define CPPJIBEA_HMMSEGMENT_H +#pragma once #include #include @@ -29,58 +10,40 @@ namespace cppjieba { class HMMSegment: public SegmentBase { public: - HMMSegment(const string& filePath) - : model_(new HMMModel(filePath)), isNeedDestroy_(true) { - } HMMSegment(const HMMModel* model) - : model_(model), isNeedDestroy_(false) { - } - ~HMMSegment() { - if(isNeedDestroy_) { - delete model_; - } + : model_(model) { } + ~HMMSegment() { } - void Cut(const string& sentence, - vector& words) const { - vector tmp; - Cut(sentence, tmp); - GetStringsFromWords(tmp, words); - } - void Cut(const string& sentence, - vector& words) const { - PreFilter pre_filter(symbols_, sentence); - PreFilter::Range range; - vector wrs; - wrs.reserve(sentence.size() / 2); - while(pre_filter.HasNext()) { - range = pre_filter.Next(); - Cut(range.begin, range.end, wrs); - } - words.clear(); - words.reserve(wrs.size()); - GetWordsFromWordRanges(sentence, wrs, words); - } - void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res) const { + virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool, + size_t) const override { RuneStrArray::const_iterator left = begin; RuneStrArray::const_iterator right = begin; - while(right != end) { - if(right->rune < 0x80) { - if(left != right) { + + while (right != end) { + if (right->rune < 0x80) { //asc码 + if (left != right) { InternalCut(left, right, res); } + left = right; + do { - right = SequentialLetterRule(left, end); - if(right != left) { + right = SequentialLetterRule(left, end);//非英文字符则返回left,否则返回left后非英文字母的位置 + + if (right != left) { break; } - right = NumbersRule(left, end); - if(right != left) { + + right = NumbersRule(left, end);//非数字则返回left,否则返回left后非数字的位置 + + if (right != left) { break; } + right ++; - } while(false); + } while (false); + WordRange wr(left, right - 1); res.push_back(wr); left = right; @@ -88,45 +51,64 @@ public: right++; } } - if(left != right) { + + if (left != right) { InternalCut(left, right, res); } } + + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm, + size_t) const override { + + } + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map& res, bool hmm, + size_t) const override { + + } private: // sequential letters rule - RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const { + RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin, + RuneStrArray::const_iterator end) const { Rune x = begin->rune; - if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) { + + if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) { begin ++; } else { return begin; } - while(begin != end) { + + while (begin != end) { x = begin->rune; - if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) { + + if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) { begin ++; } else { break; } } + return begin; } // RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const { Rune x = begin->rune; - if('0' <= x && x <= '9') { + + if ('0' <= x && x <= '9') { begin ++; } else { return begin; } - while(begin != end) { + + while (begin != end) { x = begin->rune; - if(('0' <= x && x <= '9') || x == '.') { + + if (('0' <= x && x <= '9') || x == '.') { begin++; } else { break; } } + return begin; } void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res) const { @@ -135,8 +117,9 @@ private: RuneStrArray::const_iterator left = begin; RuneStrArray::const_iterator right; - for(size_t i = 0; i < status.size(); i++) { - if(status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i]) + + for (size_t i = 0; i < status.size(); i++) { + if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i]) right = begin + i + 1; WordRange wr(left, right - 1); res.push_back(wr); @@ -155,27 +138,31 @@ private: size_t now, old, stat; double tmp, endE, endS; - vector path(XYSize); - vector weight(XYSize); + //vector path(XYSize); + //vector weight(XYSize); + int path[XYSize]; + double weight[XYSize]; //start - for(size_t y = 0; y < Y; y++) { + for (size_t y = 0; y < Y; y++) { weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE); path[0 + y * X] = -1; } double emitProb; - for(size_t x = 1; x < X; x++) { - for(size_t y = 0; y < Y; y++) { + for (size_t x = 1; x < X; x++) { + for (size_t y = 0; y < Y; y++) { now = x + y * X; weight[now] = MIN_DOUBLE; path[now] = HMMModel::E; // warning emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE); - for(size_t preY = 0; preY < Y; preY++) { + + for (size_t preY = 0; preY < Y; preY++) { old = x - 1 + preY * X; tmp = weight[old] + model_->transProb[preY][y] + emitProb; - if(tmp > weight[now]) { + + if (tmp > weight[now]) { weight[now] = tmp; path[now] = preY; } @@ -186,23 +173,23 @@ private: endE = weight[X - 1 + HMMModel::E * X]; endS = weight[X - 1 + HMMModel::S * X]; stat = 0; - if(endE >= endS) { + + if (endE >= endS) { stat = HMMModel::E; } else { stat = HMMModel::S; } status.resize(X); - for(int x = X - 1 ; x >= 0; x--) { + + for (int x = X - 1 ; x >= 0; x--) { status[x] = stat; stat = path[x + stat * X]; } } const HMMModel* model_; - bool isNeedDestroy_; }; // class HMMSegment } // namespace cppjieba -#endif diff --git a/libchinese-segmentation/cppjieba/IdfTrie.hpp b/libchinese-segmentation/cppjieba/IdfTrie.hpp new file mode 100644 index 0000000..b26decf --- /dev/null +++ b/libchinese-segmentation/cppjieba/IdfTrie.hpp @@ -0,0 +1,134 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "limonp/StringUtil.hpp" +#include "limonp/Logging.hpp" +#include "Unicode.hpp" +#include "DatTrie.hpp" +#include +namespace cppjieba { + +using namespace limonp; + +const size_t IDF_COLUMN_NUM = 2; + +class IdfTrie { +public: + enum UserWordWeightOption { + WordWeightMin, + WordWeightMedian, + WordWeightMax, + }; // enum UserWordWeightOption + + IdfTrie(const string& dict_path, const string & dat_cache_path = "", + UserWordWeightOption user_word_weight_opt = WordWeightMedian) { + Init(dict_path, dat_cache_path, user_word_weight_opt); + } + + ~IdfTrie() {} + + double Find(const string & word, std::size_t length = 0, std::size_t node_pos = 0) const { + return dat_.Find(word, length, node_pos); + } + + void Find(RuneStrArray::const_iterator begin, + RuneStrArray::const_iterator end, + vector&res, + size_t max_word_len = MAX_WORD_LENGTH) const { + dat_.Find(begin, end, res, max_word_len); + } + + bool IsUserDictSingleChineseWord(const Rune& word) const { + return IsIn(user_dict_single_chinese_word_, word); + } + + double GetMinWeight() const { + return dat_.GetMinWeight(); + } + + size_t GetTotalDictSize() const { + return total_dict_size_; + } + +private: + void Init(const string& dict_path, string dat_cache_path, + UserWordWeightOption user_word_weight_opt) { + size_t file_size_sum = 0; + const string md5 = CalcFileListMD5(dict_path, file_size_sum); + + if (dat_cache_path.empty()) { + //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519 + dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache"; + } + QString path = QString::fromStdString(dat_cache_path); + qDebug() << "#########Idf path:" << path; + if (dat_.InitIdfAttachDat(dat_cache_path, md5)) { + total_dict_size_ = file_size_sum; + return; + } + + LoadDefaultIdf(dict_path); + double idf_sum_ = CalcIdfSum(static_node_infos_); + assert(static_node_infos_.size()); + idfAverage_ = idf_sum_ / static_node_infos_.size(); + assert(idfAverage_ > 0.0); + double min_weight = 0; + dat_.SetMinWeight(min_weight); + + const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5); + assert(build_ret); + total_dict_size_ = file_size_sum; + vector().swap(static_node_infos_); + } + + void LoadDefaultIdf(const string& filePath) { + ifstream ifs(filePath.c_str()); + if(not ifs.is_open()){ + return ; + } + XCHECK(ifs.is_open()) << "open " << filePath << " failed."; + string line; + vector buf; + size_t lineno = 0; + + for (; getline(ifs, line); lineno++) { + if (line.empty()) { + XLOG(ERROR) << "lineno: " << lineno << " empty. skipped."; + continue; + } + Split(line, buf, " "); + XCHECK(buf.size() == IDF_COLUMN_NUM) << "split result illegal, line:" << line; + IdfElement node_info; + node_info.word = buf[0]; + node_info.idf = atof(buf[1].c_str()); + static_node_infos_.push_back(node_info); + } + } + + double CalcIdfSum(const vector& node_infos) const { + double sum = 0.0; + + for (size_t i = 0; i < node_infos.size(); i++) { + sum += node_infos[i].idf; + } + + return sum; + } +public: + double idfAverage_; +private: + vector static_node_infos_; + size_t total_dict_size_ = 0; + DatTrie dat_; + unordered_set user_dict_single_chinese_word_; +}; +} + diff --git a/libchinese-segmentation/cppjieba/Jieba.hpp b/libchinese-segmentation/cppjieba/Jieba.hpp index bc93b17..a7b11b3 100644 --- a/libchinese-segmentation/cppjieba/Jieba.hpp +++ b/libchinese-segmentation/cppjieba/Jieba.hpp @@ -1,24 +1,6 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEAB_JIEBA_H -#define CPPJIEAB_JIEBA_H +#pragma once +#include #include "QuerySegment.hpp" #include "KeywordExtractor.hpp" @@ -29,56 +11,48 @@ public: Jieba(const string& dict_path, const string& model_path, const string& user_dict_path, - const string& idfPath, - const string& stopWordPath) - : dict_trie_(dict_path, user_dict_path), + const string& idfPath = "", + const string& stopWordPath = "", + const string& dat_cache_path = "") + : dict_trie_(dict_path, user_dict_path, dat_cache_path), model_(model_path), mp_seg_(&dict_trie_), hmm_seg_(&model_), - mix_seg_(&dict_trie_, &model_), + mix_seg_(&dict_trie_, &model_, stopWordPath), full_seg_(&dict_trie_), - query_seg_(&dict_trie_, &model_), - extractor(&dict_trie_, &model_, idfPath, stopWordPath) { - - } - ~Jieba() { - } - - struct LocWord { - string word; - size_t begin; - size_t end; - }; // struct LocWord + query_seg_(&dict_trie_, &model_, stopWordPath), + extractor(&dict_trie_, &model_, idfPath, dat_cache_path,stopWordPath){ } + ~Jieba() { } void Cut(const string& sentence, vector& words, bool hmm = true) const { - mix_seg_.Cut(sentence, words, hmm); + mix_seg_.CutToStr(sentence, words, hmm); } void Cut(const string& sentence, vector& words, bool hmm = true) const { - mix_seg_.Cut(sentence, words, hmm); + mix_seg_.CutToWord(sentence, words, hmm); } void CutAll(const string& sentence, vector& words) const { - full_seg_.Cut(sentence, words); + full_seg_.CutToStr(sentence, words); } void CutAll(const string& sentence, vector& words) const { - full_seg_.Cut(sentence, words); + full_seg_.CutToWord(sentence, words); } void CutForSearch(const string& sentence, vector& words, bool hmm = true) const { - query_seg_.Cut(sentence, words, hmm); + query_seg_.CutToStr(sentence, words, hmm); } void CutForSearch(const string& sentence, vector& words, bool hmm = true) const { - query_seg_.Cut(sentence, words, hmm); + query_seg_.CutToWord(sentence, words, hmm); } void CutHMM(const string& sentence, vector& words) const { - hmm_seg_.Cut(sentence, words); + hmm_seg_.CutToStr(sentence, words); } void CutHMM(const string& sentence, vector& words) const { - hmm_seg_.Cut(sentence, words); + hmm_seg_.CutToWord(sentence, words); } void CutSmall(const string& sentence, vector& words, size_t max_word_len) const { - mp_seg_.Cut(sentence, words, max_word_len); + mp_seg_.CutToStr(sentence, words, false, max_word_len); } void CutSmall(const string& sentence, vector& words, size_t max_word_len) const { - mp_seg_.Cut(sentence, words, max_word_len); + mp_seg_.CutToWord(sentence, words, false, max_word_len); } void Tag(const string& sentence, vector >& words) const { @@ -87,16 +61,8 @@ public: string LookupTag(const string &str) const { return mix_seg_.LookupTag(str); } - bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) { - return dict_trie_.InsertUserWord(word, tag); - } - - bool InsertUserWord(const string& word, int freq, const string& tag = UNKNOWN_TAG) { - return dict_trie_.InsertUserWord(word, freq, tag); - } - bool Find(const string& word) { - return dict_trie_.Find(word); + return nullptr != dict_trie_.Find(word); } void ResetSeparators(const string& s) { @@ -116,18 +82,6 @@ public: return &model_; } - void LoadUserDict(const vector& buf) { - dict_trie_.LoadUserDict(buf); - } - - void LoadUserDict(const set& buf) { - dict_trie_.LoadUserDict(buf); - } - - void LoadUserDict(const string& path) { - dict_trie_.LoadUserDict(path); - } - private: DictTrie dict_trie_; HMMModel model_; @@ -145,4 +99,3 @@ public: } // namespace cppjieba -#endif // CPPJIEAB_JIEBA_H diff --git a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp index a8b3600..0011e93 100644 --- a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp +++ b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp @@ -1,27 +1,8 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H -#define CPPJIEBA_KEYWORD_EXTRACTOR_H +#pragma once #include -#include #include "MixSegment.hpp" +#include "IdfTrie.hpp" namespace cppjieba { @@ -31,141 +12,87 @@ using namespace std; /*utf8*/ class KeywordExtractor { public: - struct Word { - string word; - vector offsets; - double weight; - }; // struct Word - KeywordExtractor(const string& dictPath, - const string& hmmFilePath, - const string& idfPath, - const string& stopWordPath, - const string& userDict = "") - : segment_(dictPath, hmmFilePath, userDict) { - LoadIdfDict(idfPath); - LoadStopWordDict(stopWordPath); - } KeywordExtractor(const DictTrie* dictTrie, const HMMModel* model, const string& idfPath, + const string& dat_cache_path, const string& stopWordPath) - : segment_(dictTrie, model) { - LoadIdfDict(idfPath); - LoadStopWordDict(stopWordPath); + : segment_(dictTrie, model, stopWordPath), + idf_trie_(idfPath,dat_cache_path){ } ~KeywordExtractor() { } void Extract(const string& sentence, vector& keywords, size_t topN) const { - vector topWords; + vector topWords; Extract(sentence, topWords, topN); - for(size_t i = 0; i < topWords.size(); i++) { + + for (size_t i = 0; i < topWords.size(); i++) { keywords.push_back(topWords[i].word); } } void Extract(const string& sentence, vector >& keywords, size_t topN) const { - vector topWords; + vector topWords; Extract(sentence, topWords, topN); - for(size_t i = 0; i < topWords.size(); i++) { + + for (size_t i = 0; i < topWords.size(); i++) { keywords.push_back(pair(topWords[i].word, topWords[i].weight)); } } - void Extract(const string& sentence, vector& keywords, size_t topN) const { - vector words; - segment_.Cut(sentence, words); + void Extract(const string& sentence, vector& keywords, size_t topN) const { - map wordmap; - size_t offset = 0; - for(size_t i = 0; i < words.size(); ++i) { - size_t t = offset; - offset += words[i].size(); - if(IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) { + unordered_map wordmap;//插入字符串与Word的map,相同string统计词频叠加权重 + PreFilter pre_filter(symbols_, sentence); + RuneStrArray::const_iterator null_p; + WordRange range(null_p, null_p); + bool isNull(false); + while (pre_filter.Next(range, isNull)) { + if (isNull) { continue; } - wordmap[words[i]].offsets.push_back(t); - wordmap[words[i]].weight += 1.0; - } - if(offset != sentence.size()) { - XLOG(ERROR) << "words illegal"; - return; + segment_.CutToStr(sentence, range, wordmap); } keywords.clear(); keywords.reserve(wordmap.size()); - for(map::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) { - unordered_map::const_iterator cit = idfMap_.find(itr->first); - if(cit != idfMap_.end()) { - itr->second.weight *= cit->second; + + for (unordered_map::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) { + double idf = idf_trie_.Find(itr->first); + if (-1 != idf) {//IDF词典查找 + itr->second.weight *= idf; } else { - itr->second.weight *= idfAverage_; + itr->second.weight *= idf_trie_.idfAverage_; } + itr->second.word = itr->first; keywords.push_back(itr->second); } + topN = min(topN, keywords.size()); partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare); keywords.resize(topN); } private: - void LoadIdfDict(const string& idfPath) { - ifstream ifs(idfPath.c_str()); - XCHECK(ifs.is_open()) << "open " << idfPath << " failed"; - string line ; - vector buf; - double idf = 0.0; - double idfSum = 0.0; - size_t lineno = 0; - for(; getline(ifs, line); lineno++) { - buf.clear(); - if(line.empty()) { - XLOG(ERROR) << "lineno: " << lineno << " empty. skipped."; - continue; - } - Split(line, buf, " "); - if(buf.size() != 2) { - XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped."; - continue; - } - idf = atof(buf[1].c_str()); - idfMap_[buf[0]] = idf; - idfSum += idf; - } - - assert(lineno); - idfAverage_ = idfSum / lineno; - assert(idfAverage_ > 0.0); - } - void LoadStopWordDict(const string& filePath) { - ifstream ifs(filePath.c_str()); - XCHECK(ifs.is_open()) << "open " << filePath << " failed"; - string line ; - while(getline(ifs, line)) { - stopWords_.insert(line); - } - assert(stopWords_.size()); - } - - static bool Compare(const Word& lhs, const Word& rhs) { + static bool Compare(const KeyWord& lhs, const KeyWord& rhs) { return lhs.weight > rhs.weight; } MixSegment segment_; - unordered_map idfMap_; - double idfAverage_; + IdfTrie idf_trie_; - unordered_set stopWords_; + unordered_set symbols_; }; // class KeywordExtractor -inline ostream& operator << (ostream& os, const KeywordExtractor::Word& word) { - return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}"; +inline ostream& operator << (ostream& os, const KeyWord& word) { + return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << + "}"; } } // namespace cppjieba -#endif diff --git a/libchinese-segmentation/cppjieba/MPSegment.hpp b/libchinese-segmentation/cppjieba/MPSegment.hpp index 76744a8..0158e4a 100644 --- a/libchinese-segmentation/cppjieba/MPSegment.hpp +++ b/libchinese-segmentation/cppjieba/MPSegment.hpp @@ -1,23 +1,4 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_MPSEGMENT_H -#define CPPJIEBA_MPSEGMENT_H +#pragma once #include #include @@ -31,63 +12,36 @@ namespace cppjieba { class MPSegment: public SegmentTagged { public: - MPSegment(const string& dictPath, const string& userDictPath = "") - : dictTrie_(new DictTrie(dictPath, userDictPath)), isNeedDestroy_(true) { - } MPSegment(const DictTrie* dictTrie) - : dictTrie_(dictTrie), isNeedDestroy_(false) { + : dictTrie_(dictTrie) { assert(dictTrie_); } - ~MPSegment() { - if(isNeedDestroy_) { - delete dictTrie_; - } + ~MPSegment() { } + + virtual void Cut(RuneStrArray::const_iterator begin, + RuneStrArray::const_iterator end, + vector& words, + bool, size_t max_word_len) const override { +// vector dags; +// dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx +// CalcDP(dags);//动态规划(Dynamic Programming,DP),根据DAG计算最优动态规划路径--jxx +// CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx + dictTrie_->Find(begin, end, words, max_word_len); } - void Cut(const string& sentence, vector& words) const { - Cut(sentence, words, MAX_WORD_LENGTH); - } + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm, + size_t) const override { - void Cut(const string& sentence, - vector& words, - size_t max_word_len) const { - vector tmp; - Cut(sentence, tmp, max_word_len); - GetStringsFromWords(tmp, words); - } - void Cut(const string& sentence, - vector& words, - size_t max_word_len = MAX_WORD_LENGTH) const { - PreFilter pre_filter(symbols_, sentence); - PreFilter::Range range; - vector wrs; - wrs.reserve(sentence.size() / 2); - while(pre_filter.HasNext()) { - range = pre_filter.Next(); - Cut(range.begin, range.end, wrs, max_word_len); - } - words.clear(); - words.reserve(wrs.size()); - GetWordsFromWordRanges(sentence, wrs, words); - } - void Cut(RuneStrArray::const_iterator begin, - RuneStrArray::const_iterator end, - vector& words, - size_t max_word_len = MAX_WORD_LENGTH) const { - vector dags; - dictTrie_->Find(begin, - end, - dags, - max_word_len); - CalcDP(dags); - CutByDag(begin, end, dags, words); } + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map& res, bool hmm, + size_t) const override { - const DictTrie* GetDictTrie() const { + } + const DictTrie* GetDictTrie() const override { return dictTrie_; } - bool Tag(const string& src, vector >& res) const { + bool Tag(const string& src, vector >& res) const override { return tagger_.Tag(src, res, *this); } @@ -95,61 +49,81 @@ public: return dictTrie_->IsUserDictSingleChineseWord(value); } private: - void CalcDP(vector& dags) const { - size_t nextPos; - const DictUnit* p; - double val; +/* + void CalcDP(vector& dags) const { + double val(0); + for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) { + rit->max_next = -1; + rit->max_weight = MIN_DOUBLE; - for(vector::reverse_iterator rit = dags.rbegin(); rit != dags.rend(); rit++) { - rit->pInfo = NULL; - rit->weight = MIN_DOUBLE; - assert(!rit->nexts.empty()); - for(LocalVector >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) { - nextPos = it->first; - p = it->second; - val = 0.0; - if(nextPos + 1 < dags.size()) { - val += dags[nextPos + 1].weight; + for (const auto & it : rit->nexts) { + const auto nextPos = it.first; + val = dictTrie_->GetMinWeight(); + + if (nullptr != it.second) { + val = it.second->weight; } - if(p) { - val += p->weight; - } else { - val += dictTrie_->GetMinWeight(); + if (nextPos < dags.size()) { + val += dags[nextPos].max_weight; } - if(val > rit->weight) { - rit->pInfo = p; - rit->weight = val; + + if ((nextPos <= dags.size()) && (val > rit->max_weight)) { + rit->max_weight = val; + rit->max_next = nextPos; } } } } - void CutByDag(RuneStrArray::const_iterator begin, - RuneStrArray::const_iterator end, - const vector& dags, - vector& words) const { - size_t i = 0; - while(i < dags.size()) { - const DictUnit* p = dags[i].pInfo; - if(p) { - assert(p->word.size() >= 1); - WordRange wr(begin + i, begin + i + p->word.size() - 1); - words.push_back(wr); - i += p->word.size(); - } else { //single chinese word - WordRange wr(begin + i, begin + i); - words.push_back(wr); - i++; +*/ +/* 倒叙方式重写CalcDP函数,初步测试未发现问题*/ + void CalcDP(vector& dags) const { + double val(0); + size_t size = dags.size(); + + for (size_t i = 0; i < size; i++) { + dags[size - 1 - i].max_next = -1; + dags[size - 1 - i].max_weight = MIN_DOUBLE; + + for (const auto & it : dags[size - 1 - i].nexts) { + const auto nextPos = it.first; + val = dictTrie_->GetMinWeight(); + + if (nullptr != it.second) { + val = it.second->weight; + } + + if (nextPos < dags.size()) { + val += dags[nextPos].max_weight; + } + + if ((nextPos <= dags.size()) && (val > dags[size - 1 - i].max_weight)) { + dags[size - 1 - i].max_weight = val; + dags[size - 1 - i].max_next = nextPos; + } } } } + void CutByDag(RuneStrArray::const_iterator begin, + RuneStrArray::const_iterator, + const vector& dags, + vector& words) const { + + for (size_t i = 0; i < dags.size();) { + const auto next = dags[i].max_next; + assert(next > i); + assert(next <= dags.size()); + WordRange wr(begin + i, begin + next - 1); + words.push_back(wr); + i = next; + } + } + const DictTrie* dictTrie_; - bool isNeedDestroy_; PosTagger tagger_; }; // class MPSegment } // namespace cppjieba -#endif diff --git a/libchinese-segmentation/cppjieba/MixSegment.hpp b/libchinese-segmentation/cppjieba/MixSegment.hpp index b96309d..9e67069 100644 --- a/libchinese-segmentation/cppjieba/MixSegment.hpp +++ b/libchinese-segmentation/cppjieba/MixSegment.hpp @@ -1,23 +1,4 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_MIXSEGMENT_H -#define CPPJIEBA_MIXSEGMENT_H +#pragma once #include #include "MPSegment.hpp" @@ -28,70 +9,52 @@ namespace cppjieba { class MixSegment: public SegmentTagged { public: - MixSegment(const string& mpSegDict, const string& hmmSegDict, - const string& userDict = "") - : mpSeg_(mpSegDict, userDict), - hmmSeg_(hmmSegDict) { - } - MixSegment(const DictTrie* dictTrie, const HMMModel* model) + MixSegment(const DictTrie* dictTrie, + const HMMModel* model, + const string& stopWordPath) : mpSeg_(dictTrie), hmmSeg_(model) { + LoadStopWordDict(stopWordPath); } - ~MixSegment() { - } + ~MixSegment() {} - void Cut(const string& sentence, vector& words) const { - Cut(sentence, words, true); - } - void Cut(const string& sentence, vector& words, bool hmm) const { - vector tmp; - Cut(sentence, tmp, hmm); - GetStringsFromWords(tmp, words); - } - void Cut(const string& sentence, vector& words, bool hmm = true) const { - PreFilter pre_filter(symbols_, sentence); - PreFilter::Range range; - vector wrs; - wrs.reserve(sentence.size() / 2); - while(pre_filter.HasNext()) { - range = pre_filter.Next(); - Cut(range.begin, range.end, wrs, hmm); - } - words.clear(); - words.reserve(wrs.size()); - GetWordsFromWordRanges(sentence, wrs, words); - } - - void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm) const { - if(!hmm) { - mpSeg_.Cut(begin, end, res); + virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm, + size_t) const override { + if (!hmm) { + mpSeg_.CutRuneArray(begin, end, res); return; } + vector words; assert(end >= begin); words.reserve(end - begin); - mpSeg_.Cut(begin, end, words); + mpSeg_.CutRuneArray(begin, end, words); vector hmmRes; hmmRes.reserve(end - begin); - for(size_t i = 0; i < words.size(); i++) { + + for (size_t i = 0; i < words.size(); i++) { //if mp Get a word, it's ok, put it into result - if(words[i].left != words[i].right || (words[i].left == words[i].right && mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) { + if (words[i].left != words[i].right || (words[i].left == words[i].right && + mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) { res.push_back(words[i]); continue; } // if mp Get a single one and it is not in userdict, collect it in sequence size_t j = i; - while(j < words.size() && words[j].left == words[j].right && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) { + + while (j < words.size() && words[j].left == words[j].right && + !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) { j++; } // Cut the sequence with hmm assert(j - 1 >= i); // TODO - hmmSeg_.Cut(words[i].left, words[j - 1].left + 1, hmmRes); + hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes); + //put hmm result to result - for(size_t k = 0; k < hmmRes.size(); k++) { + for (size_t k = 0; k < hmmRes.size(); k++) { res.push_back(hmmRes[k]); } @@ -103,11 +66,141 @@ public: } } - const DictTrie* GetDictTrie() const { + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm, + size_t) const override { + //目前hmm默认开启,后期如有需要关闭再修改--jxx20210519 +// if (!hmm) { +// mpSeg_.CutRuneArray(begin, end, res); +// return; +// } + + vector words; + assert(end >= begin); + words.reserve(end - begin); + mpSeg_.CutRuneArray(begin, end, words); + + vector hmmRes; + hmmRes.reserve(end - begin); + + for (size_t i = 0; i < words.size(); i++) { + //if mp Get a word, it's ok, put it into result + if (words[i].left != words[i].right) { + res.push_back(GetStringFromRunes(s, words[i].left, words[i].right)); + continue; + } + if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune) + || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back + res.push_back(GetStringFromRunes(s, words[i].left, words[i].right)); + continue; + } + + // if mp Get a single one and it is not in userdict, collect it in sequence + size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里(i字符不是最后一个字符),直接判定j字符 + + while (j < (words.size() - 1) && words[j].left == words[j].right && + !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) { + j++; + } + + // Cut the sequence with hmm + assert(j - 1 >= i); + // TODO + hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes); + + //put hmm result to result + for (size_t k = 0; k < hmmRes.size(); k++) { + res.push_back(GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right)); + } + + //clear tmp vars + hmmRes.clear(); + + //let i jump over this piece + i = j - 1; + } + } + + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map& res, bool hmm, + size_t) const override { + vector words; + vector hmmRes; + assert(end >= begin); + if (3 == begin->len or 4 == begin->len) { + words.reserve(end - begin); + mpSeg_.CutRuneArray(begin, end, words); + hmmRes.reserve(words.size()); + } else { + hmmRes.reserve(end - begin); + } + + if (words.size() != 0) {//存在中文分词结果 + for (size_t i = 0; i < words.size(); i++) { + + string str = GetStringFromRunes(s, words[i].left, words[i].right); + + if (stopWords_.find(str) != stopWords_.end()) { + continue; + } + + if (words[i].left != words[i].right) { + res[str].offsets.push_back(words[i].left->offset); + res[str].weight += 1.0; + continue; + } + if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune) + || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back + if (stopWords_.find(str) != stopWords_.end()) { + continue; + } + res[str].offsets.push_back(words[i].left->offset); + res[str].weight += 1.0; + continue; + } + + // if mp Get a single one and it is not in userdict, collect it in sequence + size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里(i字符不是最后一个字符),直接判定j字符 + + while (j < (words.size() - 1) + && words[j].left == words[j].right + && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) { + j++; + } + + // Cut the sequence with hmm + assert(j - 1 >= i); + // TODO + hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes); + + //put hmm result to result + for (size_t k = 0; k < hmmRes.size(); k++) { + string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right); + if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) { + continue; + } + res[hmmStr].offsets.push_back(hmmRes[k].left->offset); + res[hmmStr].weight += 1.0; + } + + //clear tmp vars + hmmRes.clear(); + + //let i jump over this piece + i = j - 1; + } + } else {//不存在中文分词结果 + for (size_t i = 0; i < (size_t)(end - begin); i++) { + string str = s.substr((begin+i)->offset, (begin+i)->len); + res[str].offsets.push_back((begin+i)->offset); + res[str].weight += 1.0; + } + } + } + + const DictTrie* GetDictTrie() const override { return mpSeg_.GetDictTrie(); } - bool Tag(const string& src, vector >& res) const { + bool Tag(const string& src, vector >& res) const override { return tagger_.Tag(src, res, *this); } @@ -115,7 +208,23 @@ public: return tagger_.LookupTag(str, *this); } + void LoadStopWordDict(const string& filePath) { + ifstream ifs(filePath.c_str()); + if(not ifs.is_open()){ + return ; + } + XCHECK(ifs.is_open()) << "open " << filePath << " failed"; + string line ; + + while (getline(ifs, line)) { + stopWords_.insert(line); + } + + assert(stopWords_.size()); + } private: + unordered_set stopWords_; + MPSegment mpSeg_; HMMSegment hmmSeg_; PosTagger tagger_; @@ -124,4 +233,3 @@ private: } // namespace cppjieba -#endif diff --git a/libchinese-segmentation/cppjieba/PosTagger.hpp b/libchinese-segmentation/cppjieba/PosTagger.hpp index aae39ba..5bbed42 100644 --- a/libchinese-segmentation/cppjieba/PosTagger.hpp +++ b/libchinese-segmentation/cppjieba/PosTagger.hpp @@ -1,27 +1,8 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_POS_TAGGING_H -#define CPPJIEBA_POS_TAGGING_H +#pragma once #include "limonp/StringUtil.hpp" -#include "SegmentTagged.hpp" #include "DictTrie.hpp" +#include "SegmentTagged.hpp" namespace cppjieba { using namespace limonp; @@ -39,28 +20,31 @@ public: bool Tag(const string& src, vector >& res, const SegmentTagged& segment) const { vector CutRes; - segment.Cut(src, CutRes); + segment.CutToStr(src, CutRes); - for(vector::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) { + for (vector::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) { res.push_back(make_pair(*itr, LookupTag(*itr, segment))); } + return !res.empty(); } string LookupTag(const string &str, const SegmentTagged& segment) const { - const DictUnit *tmp = NULL; - RuneStrArray runes; const DictTrie * dict = segment.GetDictTrie(); assert(dict != NULL); - if(!DecodeRunesInString(str, runes)) { - XLOG(ERROR) << "Decode failed."; - return POS_X; - } - tmp = dict->Find(runes.begin(), runes.end()); - if(tmp == NULL || tmp->tag.empty()) { + const auto tmp = dict->Find(str); + + if (tmp == NULL || tmp->GetTag().empty()) { + RuneStrArray runes; + + if (!DecodeRunesInString(str, runes)) { + XLOG(ERROR) << "Decode failed."; + return POS_X; + } + return SpecialRule(runes); } else { - return tmp->tag; + return tmp->GetTag(); } } @@ -68,22 +52,27 @@ private: const char* SpecialRule(const RuneStrArray& unicode) const { size_t m = 0; size_t eng = 0; - for(size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) { - if(unicode[i].rune < 0x80) { + + for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) { + if (unicode[i].rune < 0x80) { eng ++; - if('0' <= unicode[i].rune && unicode[i].rune <= '9') { + + if ('0' <= unicode[i].rune && unicode[i].rune <= '9') { m++; } } } + // ascii char is not found - if(eng == 0) { + if (eng == 0) { return POS_X; } + // all the ascii is number char - if(m == eng) { + if (m == eng) { return POS_M; } + // the ascii chars contain english letter return POS_ENG; } @@ -92,4 +81,3 @@ private: } // namespace cppjieba -#endif diff --git a/libchinese-segmentation/cppjieba/PreFilter.hpp b/libchinese-segmentation/cppjieba/PreFilter.hpp index 848d43c..2dd30dd 100644 --- a/libchinese-segmentation/cppjieba/PreFilter.hpp +++ b/libchinese-segmentation/cppjieba/PreFilter.hpp @@ -1,43 +1,20 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_PRE_FILTER_H -#define CPPJIEBA_PRE_FILTER_H +#pragma once -#include "Trie.hpp" #include "limonp/Logging.hpp" +#include +#include "Unicode.hpp" namespace cppjieba { class PreFilter { public: - //TODO use WordRange instead of Range - struct Range { - RuneStrArray::const_iterator begin; - RuneStrArray::const_iterator end; - }; // struct Range - - PreFilter(const unordered_set& symbols, + PreFilter(const std::unordered_set& symbols, const string& sentence) : symbols_(symbols) { - if(!DecodeRunesInString(sentence, sentence_)) { - XLOG(ERROR) << "decode failed. "; + if (!DecodeRunesInString(sentence, sentence_)) { + XLOG(ERROR) << "decode failed. "<rune)) { - if(range.begin == cursor_) { + bool Next(WordRange& wordRange) { + + if (cursor_ == sentence_.end()) { + return false; + } + + wordRange.left = cursor_; + + while (cursor_->rune == 0x20 && cursor_ != sentence_.end()) { + cursor_++; + } + + if (cursor_ == sentence_.end()) { + wordRange.right = cursor_; + return true; + } + + while (++cursor_ != sentence_.end()) { + if (cursor_->rune == 0x20) { + wordRange.right = cursor_; + return true; + } + } + + wordRange.right = sentence_.end(); + return true; + } + + bool Next(WordRange& wordRange, bool& isNull) { + isNull = false; + if (cursor_ == sentence_.end()) { + return false; + } + + wordRange.left = cursor_; + if (cursor_->rune == 0x20) { + while (cursor_ != sentence_.end()) { + if (cursor_->rune != 0x20) { + if (wordRange.left == cursor_) { + cursor_ ++; + } + wordRange.right = cursor_; + isNull = true; + return true; + } + cursor_ ++; + } + } + + int max_num = 0; + uint32_t utf8_num = cursor_->len; + + while (cursor_ != sentence_.end()) { + if (cursor_->rune == 0x20) { + if (wordRange.left == cursor_) { cursor_ ++; } - range.end = cursor_; + + wordRange.right = cursor_; + return true; + } + + cursor_ ++; + max_num++; + if (max_num >= 1024 or cursor_->len != utf8_num) { //todo 防止一次性传入过多字节,暂定限制为1024个字 + wordRange.right = cursor_; + return true; + } + } + + wordRange.right = sentence_.end(); + return true; + } + + WordRange Next() { + WordRange range(cursor_, cursor_); + + while (cursor_ != sentence_.end()) { + //if (IsIn(symbols_, cursor_->rune)) { + if (cursor_->rune == 0x20) { + if (range.left == cursor_) { + cursor_ ++; + } + + range.right = cursor_; return range; } + cursor_ ++; } - range.end = sentence_.end(); + + range.right = sentence_.end(); return range; } private: RuneStrArray::const_iterator cursor_; RuneStrArray sentence_; - const unordered_set& symbols_; + const std::unordered_set& symbols_; }; // class PreFilter } // namespace cppjieba -#endif // CPPJIEBA_PRE_FILTER_H diff --git a/libchinese-segmentation/cppjieba/QuerySegment.hpp b/libchinese-segmentation/cppjieba/QuerySegment.hpp index 08a9a15..9db0b97 100644 --- a/libchinese-segmentation/cppjieba/QuerySegment.hpp +++ b/libchinese-segmentation/cppjieba/QuerySegment.hpp @@ -1,23 +1,4 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_QUERYSEGMENT_H -#define CPPJIEBA_QUERYSEGMENT_H +#pragma once #include #include @@ -28,74 +9,70 @@ #include "FullSegment.hpp" #include "MixSegment.hpp" #include "Unicode.hpp" +#include "DictTrie.hpp" namespace cppjieba { class QuerySegment: public SegmentBase { public: - QuerySegment(const string& dict, const string& model, const string& userDict = "") - : mixSeg_(dict, model, userDict), - trie_(mixSeg_.GetDictTrie()) { - } - QuerySegment(const DictTrie* dictTrie, const HMMModel* model) - : mixSeg_(dictTrie, model), trie_(dictTrie) { + QuerySegment(const DictTrie* dictTrie, + const HMMModel* model, + const string& stopWordPath) + : mixSeg_(dictTrie, model, stopWordPath), trie_(dictTrie) { } ~QuerySegment() { } - void Cut(const string& sentence, vector& words) const { - Cut(sentence, words, true); - } - void Cut(const string& sentence, vector& words, bool hmm) const { - vector tmp; - Cut(sentence, tmp, hmm); - GetStringsFromWords(tmp, words); - } - void Cut(const string& sentence, vector& words, bool hmm = true) const { - PreFilter pre_filter(symbols_, sentence); - PreFilter::Range range; - vector wrs; - wrs.reserve(sentence.size() / 2); - while(pre_filter.HasNext()) { - range = pre_filter.Next(); - Cut(range.begin, range.end, wrs, hmm); - } - words.clear(); - words.reserve(wrs.size()); - GetWordsFromWordRanges(sentence, wrs, words); - } - void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm) const { + virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm, + size_t) const override { //use mix Cut first vector mixRes; - mixSeg_.Cut(begin, end, mixRes, hmm); + mixSeg_.CutRuneArray(begin, end, mixRes, hmm); vector fullRes; - for(vector::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) { - if(mixResItr->Length() > 2) { - for(size_t i = 0; i + 1 < mixResItr->Length(); i++) { - WordRange wr(mixResItr->left + i, mixResItr->left + i + 1); - if(trie_->Find(wr.left, wr.right + 1) != NULL) { + + for (vector::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) { + if (mixResItr->Length() > 2) { + for (size_t i = 0; i + 1 < mixResItr->Length(); i++) { + string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 2); + + if (trie_->Find(text) != NULL) { + WordRange wr(mixResItr->left + i, mixResItr->left + i + 1); res.push_back(wr); } } } - if(mixResItr->Length() > 3) { - for(size_t i = 0; i + 2 < mixResItr->Length(); i++) { - WordRange wr(mixResItr->left + i, mixResItr->left + i + 2); - if(trie_->Find(wr.left, wr.right + 1) != NULL) { + + if (mixResItr->Length() > 3) { + for (size_t i = 0; i + 2 < mixResItr->Length(); i++) { + string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 3); + + if (trie_->Find(text) != NULL) { + WordRange wr(mixResItr->left + i, mixResItr->left + i + 2); res.push_back(wr); } } } + res.push_back(*mixResItr); } } + + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm, + size_t) const override { + + } + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map& res, bool hmm, + size_t) const override { + + } private: - bool IsAllAscii(const Unicode& s) const { - for(size_t i = 0; i < s.size(); i++) { - if(s[i] >= 0x80) { + bool IsAllAscii(const RuneArray& s) const { + for (size_t i = 0; i < s.size(); i++) { + if (s[i] >= 0x80) { return false; } } + return true; } MixSegment mixSeg_; @@ -104,4 +81,3 @@ private: } // namespace cppjieba -#endif diff --git a/libchinese-segmentation/cppjieba/SegmentBase.hpp b/libchinese-segmentation/cppjieba/SegmentBase.hpp index ce06baf..942e0bd 100644 --- a/libchinese-segmentation/cppjieba/SegmentBase.hpp +++ b/libchinese-segmentation/cppjieba/SegmentBase.hpp @@ -1,23 +1,4 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_SEGMENTBASE_H -#define CPPJIEBA_SEGMENTBASE_H +#pragma once #include "limonp/Logging.hpp" #include "PreFilter.hpp" @@ -35,24 +16,74 @@ public: SegmentBase() { XCHECK(ResetSeparators(SPECIAL_SEPARATORS)); } - virtual ~SegmentBase() { + virtual ~SegmentBase() { } + + virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm, + size_t max_word_len) const = 0; + //添加基于sentence的cut方法,减少中间变量的存储与格式转换--jxx20210517 + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm, + size_t max_word_len) const = 0; + virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map& res, bool hmm, + size_t max_word_len) const = 0; + //重写CutToStr函数,简化获取vector& words的流程,降低内存占用--jxx20210517 + void CutToStr(const string& sentence, vector& words, bool hmm = true, + size_t max_word_len = MAX_WORD_LENGTH) const { + PreFilter pre_filter(symbols_, sentence); + words.clear(); + words.reserve(sentence.size() / 2);//todo 参考源码,参数待定 + RuneStrArray::const_iterator null_p; + WordRange range(null_p, null_p); + while (pre_filter.Next(range)) { + CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len); + } + } + void CutToStr(const string& sentence, WordRange range, vector& words, bool hmm = true, + size_t max_word_len = MAX_WORD_LENGTH) const { + CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len); + } + void CutToStr(const string& sentence, WordRange range, unordered_map& words, bool hmm = true, + size_t max_word_len = MAX_WORD_LENGTH) const { + CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len); + } + void CutToWord(const string& sentence, vector& words, bool hmm = true, + size_t max_word_len = MAX_WORD_LENGTH) const { + PreFilter pre_filter(symbols_, sentence); + vector wrs; + wrs.reserve(sentence.size() / 2); + + while (pre_filter.HasNext()) { + auto range = pre_filter.Next(); + Cut(range.left, range.right, wrs, hmm, max_word_len); + } + + words.clear(); + words.reserve(wrs.size()); + GetWordsFromWordRanges(sentence, wrs, words); + wrs.clear(); + vector().swap(wrs); } - virtual void Cut(const string& sentence, vector& words) const = 0; + void CutRuneArray(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, + bool hmm = true, size_t max_word_len = MAX_WORD_LENGTH) const { + Cut(begin, end, res, hmm, max_word_len); + } bool ResetSeparators(const string& s) { symbols_.clear(); RuneStrArray runes; - if(!DecodeRunesInString(s, runes)) { + + if (!DecodeRunesInString(s, runes)) { XLOG(ERROR) << "decode " << s << " failed"; return false; } - for(size_t i = 0; i < runes.size(); i++) { - if(!symbols_.insert(runes[i].rune).second) { + + for (size_t i = 0; i < runes.size(); i++) { + if (!symbols_.insert(runes[i].rune).second) { XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists"; return false; } } + return true; } protected: @@ -61,4 +92,3 @@ protected: } // cppjieba -#endif diff --git a/libchinese-segmentation/cppjieba/SegmentTagged.hpp b/libchinese-segmentation/cppjieba/SegmentTagged.hpp index 0ddc259..be68c3a 100644 --- a/libchinese-segmentation/cppjieba/SegmentTagged.hpp +++ b/libchinese-segmentation/cppjieba/SegmentTagged.hpp @@ -1,23 +1,4 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_SEGMENTTAGGED_H -#define CPPJIEBA_SEGMENTTAGGED_H +#pragma once #include "SegmentBase.hpp" @@ -38,4 +19,3 @@ public: } // cppjieba -#endif diff --git a/libchinese-segmentation/cppjieba/TextRankExtractor.hpp b/libchinese-segmentation/cppjieba/TextRankExtractor.hpp index cb89a02..1422c6f 100644 --- a/libchinese-segmentation/cppjieba/TextRankExtractor.hpp +++ b/libchinese-segmentation/cppjieba/TextRankExtractor.hpp @@ -1,212 +1,205 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H -#define CPPJIEBA_TEXTRANK_EXTRACTOR_H - -#include -#include "Jieba.hpp" - -namespace cppjieba { -using namespace limonp; -using namespace std; - -class TextRankExtractor { -public: - typedef struct _Word { - string word; - vector offsets; - double weight; - } Word; // struct Word -private: - typedef std::map WordMap; - - class WordGraph { - private: - typedef double Score; - typedef string Node; - typedef std::set NodeSet; - - typedef std::map Edges; - typedef std::map Graph; - //typedef std::unordered_map Edges; - //typedef std::unordered_map Graph; - - double d; - Graph graph; - NodeSet nodeSet; - public: - WordGraph(): d(0.85) {}; - WordGraph(double in_d): d(in_d) {}; - - void addEdge(Node start, Node end, double weight) { - Edges temp; - Edges::iterator gotEdges; - nodeSet.insert(start); - nodeSet.insert(end); - graph[start][end] += weight; - graph[end][start] += weight; - } - - void rank(WordMap &ws, size_t rankTime = 10) { - WordMap outSum; - Score wsdef, min_rank, max_rank; - - if(graph.size() == 0) - return; - - wsdef = 1.0 / graph.size(); - - for(Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) { - // edges->first start节点;edge->first end节点;edge->second 权重 - ws[edges->first].word = edges->first; - ws[edges->first].weight = wsdef; - outSum[edges->first].weight = 0; - for(Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) { - outSum[edges->first].weight += edge->second; - } - } - //sort(nodeSet.begin(),nodeSet.end()); 是否需要排序? - for(size_t i = 0; i < rankTime; i++) { - for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) { - double s = 0; - for(Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++) - // edge->first end节点;edge->second 权重 - s += edge->second / outSum[edge->first].weight * ws[edge->first].weight; - ws[*node].weight = (1 - d) + d * s; - } - } - - min_rank = max_rank = ws.begin()->second.weight; - for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++) { - if(i->second.weight < min_rank) { - min_rank = i->second.weight; - } - if(i->second.weight > max_rank) { - max_rank = i->second.weight; - } - } - for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++) { - ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0); - } - } - }; - -public: - TextRankExtractor(const string& dictPath, - const string& hmmFilePath, - const string& stopWordPath, - const string& userDict = "") - : segment_(dictPath, hmmFilePath, userDict) { - LoadStopWordDict(stopWordPath); - } - TextRankExtractor(const DictTrie* dictTrie, - const HMMModel* model, - const string& stopWordPath) - : segment_(dictTrie, model) { - LoadStopWordDict(stopWordPath); - } - TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) { - LoadStopWordDict(stopWordPath); - } - ~TextRankExtractor() { - } - - void Extract(const string& sentence, vector& keywords, size_t topN) const { - vector topWords; - Extract(sentence, topWords, topN); - for(size_t i = 0; i < topWords.size(); i++) { - keywords.push_back(topWords[i].word); - } - } - - void Extract(const string& sentence, vector >& keywords, size_t topN) const { - vector topWords; - Extract(sentence, topWords, topN); - for(size_t i = 0; i < topWords.size(); i++) { - keywords.push_back(pair(topWords[i].word, topWords[i].weight)); - } - } - - void Extract(const string& sentence, vector& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const { - vector words; - segment_.Cut(sentence, words); - - TextRankExtractor::WordGraph graph; - WordMap wordmap; - size_t offset = 0; - - for(size_t i = 0; i < words.size(); i++) { - size_t t = offset; - offset += words[i].size(); - if(IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) { - continue; - } - for(size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) { - if(IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) { - skip++; - continue; - } - graph.addEdge(words[i], words[j], 1); - } - wordmap[words[i]].offsets.push_back(t); - } - if(offset != sentence.size()) { - XLOG(ERROR) << "words illegal"; - return; - } - - graph.rank(wordmap, rankTime); - - keywords.clear(); - keywords.reserve(wordmap.size()); - for(WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) { - keywords.push_back(itr->second); - } - - topN = min(topN, keywords.size()); - partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare); - keywords.resize(topN); - } -private: - void LoadStopWordDict(const string& filePath) { - ifstream ifs(filePath.c_str()); - XCHECK(ifs.is_open()) << "open " << filePath << " failed"; - string line ; - while(getline(ifs, line)) { - stopWords_.insert(line); - } - assert(stopWords_.size()); - } - - static bool Compare(const Word &x, const Word &y) { - return x.weight > y.weight; - } - - MixSegment segment_; - unordered_set stopWords_; -}; // class TextRankExtractor - -inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) { - return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}"; -} -} // namespace cppjieba - -#endif - - + +#include +#include "Jieba.hpp" + +namespace cppjieba { +using namespace limonp; +using namespace std; + +class TextRankExtractor { +public: + typedef struct _Word { + string word; + vector offsets; + double weight; + } Word; // struct Word +private: + typedef std::map WordMap; + + class WordGraph { + private: + typedef double Score; + typedef string Node; + typedef std::set NodeSet; + + typedef std::map Edges; + typedef std::map Graph; + //typedef std::unordered_map Edges; + //typedef std::unordered_map Graph; + + double d; + Graph graph; + NodeSet nodeSet; + public: + WordGraph(): d(0.85) {}; + WordGraph(double in_d): d(in_d) {}; + + void addEdge(Node start, Node end, double weight) { + Edges temp; + Edges::iterator gotEdges; + nodeSet.insert(start); + nodeSet.insert(end); + graph[start][end] += weight; + graph[end][start] += weight; + } + + void rank(WordMap &ws, size_t rankTime = 10) { + WordMap outSum; + Score wsdef, min_rank, max_rank; + + if (graph.size() == 0) { + return; + } + + wsdef = 1.0 / graph.size(); + + for (Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) { + // edges->first start节点;edge->first end节点;edge->second 权重 + ws[edges->first].word = edges->first; + ws[edges->first].weight = wsdef; + outSum[edges->first].weight = 0; + + for (Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) { + outSum[edges->first].weight += edge->second; + } + } + + //sort(nodeSet.begin(),nodeSet.end()); 是否需要排序? + for (size_t i = 0; i < rankTime; i++) { + for (NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) { + double s = 0; + + for (Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++) + // edge->first end节点;edge->second 权重 + { + s += edge->second / outSum[edge->first].weight * ws[edge->first].weight; + } + + ws[*node].weight = (1 - d) + d * s; + } + } + + min_rank = max_rank = ws.begin()->second.weight; + + for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) { + if (i->second.weight < min_rank) { + min_rank = i->second.weight; + } + + if (i->second.weight > max_rank) { + max_rank = i->second.weight; + } + } + + for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) { + ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0); + } + } + }; + +public: + TextRankExtractor(const DictTrie* dictTrie, + const HMMModel* model, + const string& stopWordPath) + : segment_(dictTrie, model) { + LoadStopWordDict(stopWordPath); + } + TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) { + LoadStopWordDict(stopWordPath); + } + ~TextRankExtractor() { + } + + void Extract(const string& sentence, vector& keywords, size_t topN) const { + vector topWords; + Extract(sentence, topWords, topN); + + for (size_t i = 0; i < topWords.size(); i++) { + keywords.push_back(topWords[i].word); + } + } + + void Extract(const string& sentence, vector >& keywords, size_t topN) const { + vector topWords; + Extract(sentence, topWords, topN); + + for (size_t i = 0; i < topWords.size(); i++) { + keywords.push_back(pair(topWords[i].word, topWords[i].weight)); + } + } + + void Extract(const string& sentence, vector& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const { + vector words; + segment_.CutToStr(sentence, words); + + TextRankExtractor::WordGraph graph; + WordMap wordmap; + size_t offset = 0; + + for (size_t i = 0; i < words.size(); i++) { + size_t t = offset; + offset += words[i].size(); + + if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) { + continue; + } + + for (size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) { + if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) { + skip++; + continue; + } + + graph.addEdge(words[i], words[j], 1); + } + + wordmap[words[i]].offsets.push_back(t); + } + + if (offset != sentence.size()) { + XLOG(ERROR) << "words illegal"; + return; + } + + graph.rank(wordmap, rankTime); + + keywords.clear(); + keywords.reserve(wordmap.size()); + + for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) { + keywords.push_back(itr->second); + } + + topN = min(topN, keywords.size()); + partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare); + keywords.resize(topN); + } +private: + void LoadStopWordDict(const string& filePath) { + ifstream ifs(filePath.c_str()); + XCHECK(ifs.is_open()) << "open " << filePath << " failed"; + string line ; + + while (getline(ifs, line)) { + stopWords_.insert(line); + } + + assert(stopWords_.size()); + } + + static bool Compare(const Word &x, const Word &y) { + return x.weight > y.weight; + } + + MixSegment segment_; + unordered_set stopWords_; +}; // class TextRankExtractor + +inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) { + return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << + "}"; +} +} // namespace cppjieba + + + diff --git a/libchinese-segmentation/cppjieba/Trie.hpp b/libchinese-segmentation/cppjieba/Trie.hpp deleted file mode 100644 index 3f1fc69..0000000 --- a/libchinese-segmentation/cppjieba/Trie.hpp +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_TRIE_HPP -#define CPPJIEBA_TRIE_HPP - -#include -#include -#include "limonp/StdExtension.hpp" -#include "Unicode.hpp" - -namespace cppjieba { - -using namespace std; - -const size_t MAX_WORD_LENGTH = 512; - -struct DictUnit { - Unicode word; - double weight; - string tag; -}; // struct DictUnit - -// for debugging -// inline ostream & operator << (ostream& os, const DictUnit& unit) { -// string s; -// s << unit.word; -// return os << StringFormat("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight); -// } - -struct Dag { - RuneStr runestr; - // [offset, nexts.first] - limonp::LocalVector > nexts; - const DictUnit * pInfo; - double weight; - size_t nextPos; // TODO - Dag(): runestr(), pInfo(NULL), weight(0.0), nextPos(0) { - } -}; // struct Dag - -typedef Rune TrieKey; - -class TrieNode { -public : - TrieNode(): next(NULL), ptValue(NULL) { - } -public: - typedef unordered_map NextMap; - NextMap *next; - const DictUnit *ptValue; -}; - -class Trie { -public: - Trie(const vector& keys, const vector& valuePointers) - : root_(new TrieNode) { - CreateTrie(keys, valuePointers); - } - ~Trie() { - DeleteNode(root_); - } - - const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const { - if(begin == end) { - return NULL; - } - - const TrieNode* ptNode = root_; - TrieNode::NextMap::const_iterator citer; - for(RuneStrArray::const_iterator it = begin; it != end; it++) { - if(NULL == ptNode->next) { - return NULL; - } - citer = ptNode->next->find(it->rune); - if(ptNode->next->end() == citer) { - return NULL; - } - ptNode = citer->second; - } - return ptNode->ptValue; - } - - void Find(RuneStrArray::const_iterator begin, - RuneStrArray::const_iterator end, - vector&res, - size_t max_word_len = MAX_WORD_LENGTH) const { - assert(root_ != NULL); - res.resize(end - begin); - - const TrieNode *ptNode = NULL; - TrieNode::NextMap::const_iterator citer; - for(size_t i = 0; i < size_t(end - begin); i++) { - res[i].runestr = *(begin + i); - - if(root_->next != NULL && root_->next->end() != (citer = root_->next->find(res[i].runestr.rune))) { - ptNode = citer->second; - } else { - ptNode = NULL; - } - if(ptNode != NULL) { - res[i].nexts.push_back(pair(i, ptNode->ptValue)); - } else { - res[i].nexts.push_back(pair(i, static_cast(NULL))); - } - - for(size_t j = i + 1; j < size_t(end - begin) && (j - i + 1) <= max_word_len; j++) { - if(ptNode == NULL || ptNode->next == NULL) { - break; - } - citer = ptNode->next->find((begin + j)->rune); - if(ptNode->next->end() == citer) { - break; - } - ptNode = citer->second; - if(NULL != ptNode->ptValue) { - res[i].nexts.push_back(pair(j, ptNode->ptValue)); - } - } - } - } - - void InsertNode(const Unicode& key, const DictUnit* ptValue) { - if(key.begin() == key.end()) { - return; - } - - TrieNode::NextMap::const_iterator kmIter; - TrieNode *ptNode = root_; - for(Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) { - if(NULL == ptNode->next) { - ptNode->next = new TrieNode::NextMap; - } - kmIter = ptNode->next->find(*citer); - if(ptNode->next->end() == kmIter) { - TrieNode *nextNode = new TrieNode; - - ptNode->next->insert(make_pair(*citer, nextNode)); - ptNode = nextNode; - } else { - ptNode = kmIter->second; - } - } - assert(ptNode != NULL); - ptNode->ptValue = ptValue; - } - -private: - void CreateTrie(const vector& keys, const vector& valuePointers) { - if(valuePointers.empty() || keys.empty()) { - return; - } - assert(keys.size() == valuePointers.size()); - - for(size_t i = 0; i < keys.size(); i++) { - InsertNode(keys[i], valuePointers[i]); - } - } - - void DeleteNode(TrieNode* node) { - if(NULL == node) { - return; - } - if(NULL != node->next) { - for(TrieNode::NextMap::iterator it = node->next->begin(); it != node->next->end(); ++it) { - DeleteNode(it->second); - } - delete node->next; - } - delete node; - } - - TrieNode* root_; -}; // class Trie -} // namespace cppjieba - -#endif // CPPJIEBA_TRIE_HPP diff --git a/libchinese-segmentation/cppjieba/Unicode.hpp b/libchinese-segmentation/cppjieba/Unicode.hpp index 64408a2..360b461 100644 --- a/libchinese-segmentation/cppjieba/Unicode.hpp +++ b/libchinese-segmentation/cppjieba/Unicode.hpp @@ -1,23 +1,4 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ -#ifndef CPPJIEBA_UNICODE_H -#define CPPJIEBA_UNICODE_H +#pragma once #include #include @@ -25,6 +6,7 @@ #include #include #include "limonp/LocalVector.hpp" +#include "limonp/StringUtil.hpp" namespace cppjieba { @@ -33,6 +15,12 @@ using std::vector; typedef uint32_t Rune; +struct KeyWord { + string word; + vector offsets; + double weight; +}; // struct Word + struct Word { string word; uint32_t offset; @@ -50,28 +38,28 @@ inline std::ostream& operator << (std::ostream& os, const Word& w) { return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}"; } -struct RuneStr { +struct RuneInfo { Rune rune; uint32_t offset; uint32_t len; - uint32_t unicode_offset; - uint32_t unicode_length; - RuneStr(): rune(0), offset(0), len(0), unicode_offset(0), unicode_length(0) { + uint32_t unicode_offset = 0; + uint32_t unicode_length = 0; + RuneInfo(): rune(0), offset(0), len(0) { } - RuneStr(Rune r, uint32_t o, uint32_t l) - : rune(r), offset(o), len(l), unicode_offset(0), unicode_length(0) { + RuneInfo(Rune r, uint32_t o, uint32_t l) + : rune(r), offset(o), len(l) { } - RuneStr(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length) + RuneInfo(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length) : rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) { } -}; // struct RuneStr +}; // struct RuneInfo -inline std::ostream& operator << (std::ostream& os, const RuneStr& r) { +inline std::ostream& operator << (std::ostream& os, const RuneInfo& r) { return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}"; } -typedef limonp::LocalVector Unicode; -typedef limonp::LocalVector RuneStrArray; +typedef limonp::LocalVector RuneArray; +typedef limonp::LocalVector RuneStrArray; // [left, right] struct WordRange { @@ -81,129 +69,157 @@ struct WordRange { : left(l), right(r) { } size_t Length() const { - return right - left + 1; + return right - left; } + bool IsAllAscii() const { - for(RuneStrArray::const_iterator iter = left; iter <= right; ++iter) { - if(iter->rune >= 0x80) { + for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) { + if (iter->rune >= 0x80) { return false; } } + return true; } }; // struct WordRange -struct RuneStrLite { - uint32_t rune; - uint32_t len; - RuneStrLite(): rune(0), len(0) { - } - RuneStrLite(uint32_t r, uint32_t l): rune(r), len(l) { - } -}; // struct RuneStrLite -inline RuneStrLite DecodeRuneInString(const char* str, size_t len) { - RuneStrLite rp(0, 0); - if(str == NULL || len == 0) { - return rp; - } - if(!(str[0] & 0x80)) { // 0xxxxxxx +inline bool DecodeRunesInString(const string& s, RuneArray& arr) { + arr.clear(); + return limonp::Utf8ToUnicode32(s, arr); +} + +inline RuneArray DecodeRunesInString(const string& s) { + RuneArray result; + DecodeRunesInString(s, result); + return result; +} + +//重写DecodeRunesInString函数,将实现放入函数中降低内存占用加快处理流程--jxx20210518 +inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) { + + uint32_t tmp; + uint32_t offset = 0; + runes.clear(); + uint32_t len(0); + for (size_t i = 0; i < s.size();) { + if (!(s.data()[i] & 0x80)) { // 0xxxxxxx // 7bit, total 7bit - rp.rune = (uint8_t)(str[0]) & 0x7f; - rp.len = 1; - } else if((uint8_t)str[0] <= 0xdf && 1 < len) { - // 110xxxxxx + tmp = (uint8_t)(s.data()[i]) & 0x7f; + i++; + len = 1; + } else if ((uint8_t)s.data()[i] <= 0xdf && i + 1 < s.size()) { // 110xxxxxx // 5bit, total 5bit - rp.rune = (uint8_t)(str[0]) & 0x1f; + tmp = (uint8_t)(s.data()[i]) & 0x1f; // 6bit, total 11bit - rp.rune <<= 6; - rp.rune |= (uint8_t)(str[1]) & 0x3f; - rp.len = 2; - } else if((uint8_t)str[0] <= 0xef && 2 < len) { // 1110xxxxxx + tmp <<= 6; + tmp |= (uint8_t)(s.data()[i+1]) & 0x3f; + i += 2; + len = 2; + } else if((uint8_t)s.data()[i] <= 0xef && i + 2 < s.size()) { // 1110xxxxxx // 4bit, total 4bit - rp.rune = (uint8_t)(str[0]) & 0x0f; + tmp = (uint8_t)(s.data()[i]) & 0x0f; // 6bit, total 10bit - rp.rune <<= 6; - rp.rune |= (uint8_t)(str[1]) & 0x3f; + tmp <<= 6; + tmp |= (uint8_t)(s.data()[i+1]) & 0x3f; // 6bit, total 16bit - rp.rune <<= 6; - rp.rune |= (uint8_t)(str[2]) & 0x3f; + tmp <<= 6; + tmp |= (uint8_t)(s.data()[i+2]) & 0x3f; - rp.len = 3; - } else if((uint8_t)str[0] <= 0xf7 && 3 < len) { // 11110xxxx + i += 3; + len = 3; + } else if((uint8_t)s.data()[i] <= 0xf7 && i + 3 < s.size()) { // 11110xxxx // 3bit, total 3bit - rp.rune = (uint8_t)(str[0]) & 0x07; + tmp = (uint8_t)(s.data()[i]) & 0x07; // 6bit, total 9bit - rp.rune <<= 6; - rp.rune |= (uint8_t)(str[1]) & 0x3f; + tmp <<= 6; + tmp |= (uint8_t)(s.data()[i+1]) & 0x3f; // 6bit, total 15bit - rp.rune <<= 6; - rp.rune |= (uint8_t)(str[2]) & 0x3f; + tmp <<= 6; + tmp |= (uint8_t)(s.data()[i+2]) & 0x3f; // 6bit, total 21bit - rp.rune <<= 6; - rp.rune |= (uint8_t)(str[3]) & 0x3f; + tmp <<= 6; + tmp |= (uint8_t)(s.data()[i+3]) & 0x3f; - rp.len = 4; - } else { - rp.rune = 0; - rp.len = 0; - } - return rp; -} - -inline bool DecodeRunesInString(const char* s, size_t len, RuneStrArray& runes) { - runes.clear(); - runes.reserve(len / 2); - for(uint32_t i = 0, j = 0; i < len;) { - RuneStrLite rp = DecodeRuneInString(s + i, len - i); - if(rp.len == 0) { - runes.clear(); - return false; - } - RuneStr x(rp.rune, i, rp.len, j, 1); - runes.push_back(x); - i += rp.len; - ++j; - } - return true; -} - -inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) { - return DecodeRunesInString(s.c_str(), s.size(), runes); -} - -inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) { - unicode.clear(); - RuneStrArray runes; - if(!DecodeRunesInString(s, len, runes)) { + i += 4; + len = 4; + } else { return false; - } - unicode.reserve(runes.size()); - for(size_t i = 0; i < runes.size(); i++) { - unicode.push_back(runes[i].rune); + } + RuneInfo x(tmp, offset, len, i, 1); + runes.push_back(x); + offset += len; } return true; } +class RunePtrWrapper { +public: + const RuneInfo * m_ptr = nullptr; + +public: + explicit RunePtrWrapper(const RuneInfo * p) : m_ptr(p) {} + + uint32_t operator *() { + return m_ptr->rune; + } + + RunePtrWrapper operator ++(int) { + m_ptr ++; + return RunePtrWrapper(m_ptr); + } + + bool operator !=(const RunePtrWrapper & b) const { + return this->m_ptr != b.m_ptr; + } +}; + +inline string EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) { + string str; + RunePtrWrapper it_begin(begin), it_end(end); + limonp::Unicode32ToUtf8(it_begin, it_end, str); + return str; +} + +inline void EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, string& str) { + RunePtrWrapper it_begin(begin), it_end(end); + limonp::Unicode32ToUtf8(it_begin, it_end, str); + return; +} + +class Unicode32Counter { +public : + size_t length = 0; + void clear() { + length = 0; + } + void push_back(uint32_t) { + ++length; + } +}; + +inline size_t Utf8CharNum(const char * str, size_t length) { + Unicode32Counter c; + + if (limonp::Utf8ToUnicode32(str, length, c)) { + return c.length; + } + + return 0; +} + +inline size_t Utf8CharNum(const string & str) { + return Utf8CharNum(str.data(), str.size()); +} + inline bool IsSingleWord(const string& str) { - RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size()); - return rp.len == str.size(); -} - -inline bool DecodeRunesInString(const string& s, Unicode& unicode) { - return DecodeRunesInString(s.c_str(), s.size(), unicode); -} - -inline Unicode DecodeRunesInString(const string& s) { - Unicode result; - DecodeRunesInString(s, result); - return result; + return Utf8CharNum(str) == 1; } @@ -217,29 +233,31 @@ inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left, inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) { assert(right->offset >= left->offset); - uint32_t len = right->offset - left->offset + right->len; - return s.substr(left->offset, len); + //uint32_t len = right->offset - left->offset + right->len; + return s.substr(left->offset, right->offset - left->offset + right->len); } inline void GetWordsFromWordRanges(const string& s, const vector& wrs, vector& words) { - for(size_t i = 0; i < wrs.size(); i++) { + for (size_t i = 0; i < wrs.size(); i++) { words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right)); } } -inline vector GetWordsFromWordRanges(const string& s, const vector& wrs) { - vector result; - GetWordsFromWordRanges(s, wrs, result); - return result; +inline void GetWordsFromWordRanges(const string& s, const vector& wrs, vector& words) { + for (size_t i = 0; i < wrs.size(); i++) { + words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right)); + } } inline void GetStringsFromWords(const vector& words, vector& strs) { strs.resize(words.size()); - for(size_t i = 0; i < words.size(); ++i) { + + for (size_t i = 0; i < words.size(); ++i) { strs[i] = words[i].word; } } +const size_t MAX_WORD_LENGTH = 512; + } // namespace cppjieba -#endif // CPPJIEBA_UNICODE_H diff --git a/libchinese-segmentation/cppjieba/cppjieba.pri b/libchinese-segmentation/cppjieba/cppjieba.pri index fd783c4..cec0ba9 100644 --- a/libchinese-segmentation/cppjieba/cppjieba.pri +++ b/libchinese-segmentation/cppjieba/cppjieba.pri @@ -2,6 +2,7 @@ INCLUDEPATH += $$PWD HEADERS += \ $$PWD/DictTrie.hpp \ + $$PWD/IdfTrie.hpp \ $$PWD/FullSegment.hpp \ $$PWD/HMMModel.hpp \ $$PWD/HMMSegment.hpp \ @@ -17,5 +18,4 @@ HEADERS += \ $$PWD/TextRankExtractor.hpp \ $$PWD/Trie.hpp \ $$PWD/Unicode.hpp - include(limonp/limonp.pri) diff --git a/libchinese-segmentation/cppjieba/darts.h b/libchinese-segmentation/cppjieba/darts.h new file mode 100644 index 0000000..88b5048 --- /dev/null +++ b/libchinese-segmentation/cppjieba/darts.h @@ -0,0 +1,1931 @@ +#ifndef DARTS_H_ +#define DARTS_H_ + +#include +#include +#include + +#define DARTS_VERSION "0.32" + +// DARTS_THROW() throws a whose message starts with the +// file name and the line number. For example, DARTS_THROW("error message") at +// line 123 of "darts.h" throws a which has a pointer to +// "darts.h:123: exception: error message". The message is available by using +// what() as well as that of . +#define DARTS_INT_TO_STR(value) #value +#define DARTS_LINE_TO_STR(line) DARTS_INT_TO_STR(line) +#define DARTS_LINE_STR DARTS_LINE_TO_STR(__LINE__) +#define DARTS_THROW(msg) throw Darts::Details::Exception( \ + __FILE__ ":" DARTS_LINE_STR ": exception: " msg) + +namespace Darts { + +// The following namespace hides the internal types and classes. +namespace Details { + +// This header assumes that and are 32-bit integer types. +// +// Darts-clone keeps values associated with keys. The type of the values is +// . Note that the values must be positive integers because the +// most significant bit (MSB) of each value is used to represent whether the +// corresponding unit is a leaf or not. Also, the keys are represented by +// sequences of s. is the unsigned type of . +typedef char char_type; +typedef unsigned char uchar_type; +typedef int value_type; + +// The main structure of Darts-clone is an array of s, and the +// unit type is actually a wrapper of . +typedef unsigned int id_type; + +// is the type of callback functions for reporting the +// progress of building a dictionary. See also build() of . +// The 1st argument receives the progress value and the 2nd argument receives +// the maximum progress value. A usage example is to show the progress +// percentage, 100.0 * (the 1st argument) / (the 2nd argument). +typedef int (*progress_func_type)(std::size_t, std::size_t); + +// is the type of double-array units and it is a wrapper of +// in practice. +class DoubleArrayUnit { + public: + DoubleArrayUnit() : unit_() {} + + // has_leaf() returns whether a leaf unit is immediately derived from the + // unit (true) or not (false). + bool has_leaf() const { + return ((unit_ >> 8) & 1) == 1; + } + // value() returns the value stored in the unit, and thus value() is + // available when and only when the unit is a leaf unit. + value_type value() const { + return static_cast(unit_ & ((1U << 31) - 1)); + } + + // label() returns the label associted with the unit. Note that a leaf unit + // always returns an invalid label. For this feature, leaf unit's label() + // returns an that has the MSB of 1. + id_type label() const { + return unit_ & ((1U << 31) | 0xFF); + } + // offset() returns the offset from the unit to its derived units. + id_type offset() const { + return (unit_ >> 10) << ((unit_ & (1U << 9)) >> 6); + } + + private: + id_type unit_; + + // Copyable. +}; + +// Darts-clone throws an for memory allocation failure, invalid +// arguments or a too large offset. The last case means that there are too many +// keys in the given set of keys. Note that the `msg' of must be a +// constant or static string because an keeps only a pointer to +// that string. +class Exception : public std::exception { + public: + explicit Exception(const char *msg = NULL) throw() : msg_(msg) {} + Exception(const Exception &rhs) throw() : msg_(rhs.msg_) {} + virtual ~Exception() throw() {} + + // overrides what() of . + virtual const char *what() const throw() { + return (msg_ != NULL) ? msg_ : ""; + } + + private: + const char *msg_; + + // Disallows operator=. + Exception &operator=(const Exception &); +}; + +} // namespace Details + +// is the interface of Darts-clone. Note that other +// classes should not be accessed from outside. +// +// has 4 template arguments but only the 3rd one is used as +// the type of values. Note that the given is used only from outside, and +// the internal value type is not changed from . +// In build(), given values are casted from to +// by using static_cast. On the other hand, values are casted from +// to in searching dictionaries. +template +class DoubleArrayImpl { + public: + // Even if this is changed, the internal value type is still + // . Other types, such as 64-bit integer types + // and floating-point number types, should not be used. + typedef T value_type; + // A key is reprenseted by a sequence of s. For example, + // exactMatchSearch() takes a . + typedef Details::char_type key_type; + // In searching dictionaries, the values associated with the matched keys are + // stored into or returned as s. + typedef value_type result_type; + + // enables applications to get the lengths of the matched + // keys in addition to the values. + struct result_pair_type { + value_type value; + std::size_t length; + }; + + // The constructor initializes member variables with 0 and NULLs. + DoubleArrayImpl() : size_(0), array_(NULL), buf_(NULL) {} + // The destructor frees memory allocated for units and then initializes + // member variables with 0 and NULLs. + virtual ~DoubleArrayImpl() { + clear(); + } + + // has 2 kinds of set_result()s. The 1st set_result() is to + // set a value to a . The 2nd set_result() is to set a value and + // a length to a . By using set_result()s, search methods + // can return the 2 kinds of results in the same way. + // Why the set_result()s are non-static? It is for compatibility. + // + // The 1st set_result() takes a length as the 3rd argument but it is not + // used. If a compiler does a good job, codes for getting the length may be + // removed. + void set_result(value_type *result, value_type value, std::size_t) const { + *result = value; + } + // The 2nd set_result() uses both `value' and `length'. + void set_result(result_pair_type *result, + value_type value, std::size_t length) const { + result->value = value; + result->length = length; + } + + // set_array() calls clear() in order to free memory allocated to the old + // array and then sets a new array. This function is useful to set a memory- + // mapped array. Note that the array set by set_array() is not freed in + // clear() and the destructor of . + // set_array() can also set the size of the new array but the size is not + // used in search methods. So it works well even if the 2nd argument is 0 or + // omitted. Remember that size() and total_size() returns 0 in such a case. + void set_array(const void *ptr, std::size_t size = 0) { + clear(); + array_ = static_cast(ptr); + size_ = size; + } + // array() returns a pointer to the array of units. + const void *array() const { + return array_; + } + + // clear() frees memory allocated to units and then initializes member + // variables with 0 and NULLs. Note that clear() does not free memory if the + // array of units was set by set_array(). In such a case, `array_' is not + // NULL and `buf_' is NULL. + void clear() { + size_ = 0; + array_ = NULL; + if (buf_ != NULL) { + delete[] buf_; + buf_ = NULL; + } + } + + // unit_size() returns the size of each unit. The size must be 4 bytes. + std::size_t unit_size() const { + return sizeof(unit_type); + } + // size() returns the number of units. It can be 0 if set_array() is used. + std::size_t size() const { + return size_; + } + // total_size() returns the number of bytes allocated to the array of units. + // It can be 0 if set_array() is used. + std::size_t total_size() const { + return unit_size() * size(); + } + // nonzero_size() exists for compatibility. It always returns the number of + // units because it takes long time to count the number of non-zero units. + std::size_t nonzero_size() const { + return size(); + } + + // build() constructs a dictionary from given key-value pairs. If `lengths' + // is NULL, `keys' is handled as an array of zero-terminated strings. If + // `values' is NULL, the index in `keys' is associated with each key, i.e. + // the ith key has (i - 1) as its value. + // Note that the key-value pairs must be arranged in key order and the values + // must not be negative. Also, if there are duplicate keys, only the first + // pair will be stored in the resultant dictionary. + // `progress_func' is a pointer to a callback function. If it is not NULL, + // it will be called in build() so that the caller can check the progress of + // dictionary construction. For details, please see the definition of + // . + // The return value of build() is 0, and it indicates the success of the + // operation. Otherwise, build() throws a , which is a + // derived class of . + // build() uses another construction algorithm if `values' is not NULL. In + // this case, Darts-clone uses a Directed Acyclic Word Graph (DAWG) instead + // of a trie because a DAWG is likely to be more compact than a trie. + int build(std::size_t num_keys, const key_type * const *keys, + const std::size_t *lengths = NULL, const value_type *values = NULL, + Details::progress_func_type progress_func = NULL); + + // open() reads an array of units from the specified file. And if it goes + // well, the old array will be freed and replaced with the new array read + // from the file. `offset' specifies the number of bytes to be skipped before + // reading an array. `size' specifies the number of bytes to be read from the + // file. If the `size' is 0, the whole file will be read. + // open() returns 0 iff the operation succeeds. Otherwise, it returns a + // non-zero value or throws a . The exception is thrown + // when and only when a memory allocation fails. + int open(const char *file_name, const char *mode = "rb", + std::size_t offset = 0, std::size_t size = 0); + // save() writes the array of units into the specified file. `offset' + // specifies the number of bytes to be skipped before writing the array. + // open() returns 0 iff the operation succeeds. Otherwise, it returns a + // non-zero value. + int save(const char *file_name, const char *mode = "wb", + std::size_t offset = 0) const; + + // The 1st exactMatchSearch() tests whether the given key exists or not, and + // if it exists, its value and length are set to `result'. Otherwise, the + // value and the length of `result' are set to -1 and 0 respectively. + // Note that if `length' is 0, `key' is handled as a zero-terminated string. + // `node_pos' specifies the start position of matching. This argument enables + // the combination of exactMatchSearch() and traverse(). For example, if you + // want to test "xyzA", "xyzBC", and "xyzDE", you can use traverse() to get + // the node position corresponding to "xyz" and then you can use + // exactMatchSearch() to test "A", "BC", and "DE" from that position. + // Note that the length of `result' indicates the length from the `node_pos'. + // In the above example, the lengths are { 1, 2, 2 }, not { 4, 5, 5 }. + template + void exactMatchSearch(const key_type *key, U &result, + std::size_t length = 0, std::size_t node_pos = 0) const { + result = exactMatchSearch(key, length, node_pos); + } + // The 2nd exactMatchSearch() returns a result instead of updating the 2nd + // argument. So, the following exactMatchSearch() has only 3 arguments. + template + inline U exactMatchSearch(const key_type *key, std::size_t length = 0, + std::size_t node_pos = 0) const; + + // commonPrefixSearch() searches for keys which match a prefix of the given + // string. If `length' is 0, `key' is handled as a zero-terminated string. + // The values and the lengths of at most `max_num_results' matched keys are + // stored in `results'. commonPrefixSearch() returns the number of matched + // keys. Note that the return value can be larger than `max_num_results' if + // there are more than `max_num_results' matches. If you want to get all the + // results, allocate more spaces and call commonPrefixSearch() again. + // `node_pos' works as well as in exactMatchSearch(). + template + inline std::size_t commonPrefixSearch(const key_type *key, U *results, + std::size_t max_num_results, std::size_t length = 0, + std::size_t node_pos = 0) const; + + // In Darts-clone, a dictionary is a deterministic finite-state automaton + // (DFA) and traverse() tests transitions on the DFA. The initial state is + // `node_pos' and traverse() chooses transitions labeled key[key_pos], + // key[key_pos + 1], ... in order. If there is not a transition labeled + // key[key_pos + i], traverse() terminates the transitions at that state and + // returns -2. Otherwise, traverse() ends without a termination and returns + // -1 or a nonnegative value, -1 indicates that the final state was not an + // accept state. When a nonnegative value is returned, it is the value + // associated with the final accept state. That is, traverse() returns the + // value associated with the given key if it exists. Note that traverse() + // updates `node_pos' and `key_pos' after each transition. + inline value_type traverse(const key_type *key, std::size_t &node_pos, + std::size_t &key_pos, std::size_t length = 0) const; + + private: + typedef Details::uchar_type uchar_type; + typedef Details::id_type id_type; + typedef Details::DoubleArrayUnit unit_type; + + std::size_t size_; + const unit_type *array_; + unit_type *buf_; + + // Disallows copy and assignment. + DoubleArrayImpl(const DoubleArrayImpl &); + DoubleArrayImpl &operator=(const DoubleArrayImpl &); +}; + +// is the typical instance of . It uses +// as the type of values and it is suitable for most cases. +typedef DoubleArrayImpl DoubleArray; + +// The interface section ends here. For using Darts-clone, there is no need +// to read the remaining section, which gives the implementation of +// Darts-clone. + +// +// Member functions of DoubleArrayImpl (except build()). +// + +template +int DoubleArrayImpl::open(const char *file_name, + const char *mode, std::size_t offset, std::size_t size) { +#ifdef _MSC_VER + std::FILE *file; + if (::fopen_s(&file, file_name, mode) != 0) { + return -1; + } +#else + std::FILE *file = std::fopen(file_name, mode); + if (file == NULL) { + return -1; + } +#endif + + if (size == 0) { + if (std::fseek(file, 0, SEEK_END) != 0) { + std::fclose(file); + return -1; + } + size = std::ftell(file) - offset; + } + + size /= unit_size(); + if (size < 256 || (size & 0xFF) != 0) { + std::fclose(file); + return -1; + } + + if (std::fseek(file, offset, SEEK_SET) != 0) { + std::fclose(file); + return -1; + } + + unit_type units[256]; + if (std::fread(units, unit_size(), 256, file) != 256) { + std::fclose(file); + return -1; + } + + if (units[0].label() != '\0' || units[0].has_leaf() || + units[0].offset() == 0 || units[0].offset() >= 512) { + std::fclose(file); + return -1; + } + for (id_type i = 1; i < 256; ++i) { + if (units[i].label() <= 0xFF && units[i].offset() >= size) { + std::fclose(file); + return -1; + } + } + + unit_type *buf; + try { + buf = new unit_type[size]; + for (id_type i = 0; i < 256; ++i) { + buf[i] = units[i]; + } + } catch (const std::bad_alloc &) { + std::fclose(file); + DARTS_THROW("failed to open double-array: std::bad_alloc"); + } + + if (size > 256) { + if (std::fread(buf + 256, unit_size(), size - 256, file) != size - 256) { + std::fclose(file); + delete[] buf; + return -1; + } + } + std::fclose(file); + + clear(); + + size_ = size; + array_ = buf; + buf_ = buf; + return 0; +} + +template +int DoubleArrayImpl::save(const char *file_name, + const char *mode, std::size_t offset) const { + if (size() == 0) { + return -1; + } + +#ifdef _MSC_VER + std::FILE *file; + if (::fopen_s(&file, file_name, mode) != 0) { + return -1; + } +#else + std::FILE *file = std::fopen(file_name, mode); + if (file == NULL) { + return -1; + } +#endif + + if (std::fseek(file, offset, SEEK_SET) != 0) { + std::fclose(file); + return -1; + } + + if (std::fwrite(array_, unit_size(), size(), file) != size()) { + std::fclose(file); + return -1; + } + std::fclose(file); + return 0; +} + +template +template +inline U DoubleArrayImpl::exactMatchSearch(const key_type *key, + std::size_t length, std::size_t node_pos) const { + U result; + set_result(&result, static_cast(-1), 0); + + unit_type unit = array_[node_pos]; + if (length != 0) { + for (std::size_t i = 0; i < length; ++i) { + node_pos ^= unit.offset() ^ static_cast(key[i]); + unit = array_[node_pos]; + if (unit.label() != static_cast(key[i])) { + return result; + } + } + } else { + for ( ; key[length] != '\0'; ++length) { + node_pos ^= unit.offset() ^ static_cast(key[length]); + unit = array_[node_pos]; + if (unit.label() != static_cast(key[length])) { + return result; + } + } + } + + if (!unit.has_leaf()) { + return result; + } + unit = array_[node_pos ^ unit.offset()]; + set_result(&result, static_cast(unit.value()), length); + return result; +} + +template +template +inline std::size_t DoubleArrayImpl::commonPrefixSearch( + const key_type *key, U *results, std::size_t max_num_results, + std::size_t length, std::size_t node_pos) const { + std::size_t num_results = 0; + + unit_type unit = array_[node_pos]; + node_pos ^= unit.offset(); + if (length != 0) { + for (std::size_t i = 0; i < length; ++i) { + node_pos ^= static_cast(key[i]); + unit = array_[node_pos]; + if (unit.label() != static_cast(key[i])) { + return num_results; + } + + node_pos ^= unit.offset(); + if (unit.has_leaf()) { + if (num_results < max_num_results) { + set_result(&results[num_results], static_cast( + array_[node_pos].value()), i + 1); + } + ++num_results; + } + } + } else { + for ( ; key[length] != '\0'; ++length) { + node_pos ^= static_cast(key[length]); + unit = array_[node_pos]; + if (unit.label() != static_cast(key[length])) { + return num_results; + } + + node_pos ^= unit.offset(); + if (unit.has_leaf()) { + if (num_results < max_num_results) { + set_result(&results[num_results], static_cast( + array_[node_pos].value()), length + 1); + } + ++num_results; + } + } + } + + return num_results; +} + +template +inline typename DoubleArrayImpl::value_type +DoubleArrayImpl::traverse(const key_type *key, + std::size_t &node_pos, std::size_t &key_pos, std::size_t length) const { + id_type id = static_cast(node_pos); + unit_type unit = array_[id]; + + if (length != 0) { + for ( ; key_pos < length; ++key_pos) { + id ^= unit.offset() ^ static_cast(key[key_pos]); + unit = array_[id]; + if (unit.label() != static_cast(key[key_pos])) { + return static_cast(-2); + } + node_pos = id; + } + } else { + for ( ; key[key_pos] != '\0'; ++key_pos) { + id ^= unit.offset() ^ static_cast(key[key_pos]); + unit = array_[id]; + if (unit.label() != static_cast(key[key_pos])) { + return static_cast(-2); + } + node_pos = id; + } + } + + if (!unit.has_leaf()) { + return static_cast(-1); + } + unit = array_[id ^ unit.offset()]; + return static_cast(unit.value()); +} + +namespace Details { + +// +// Memory management of array. +// + +template +class AutoArray { + public: + explicit AutoArray(T *array = NULL) : array_(array) {} + ~AutoArray() { + clear(); + } + + const T &operator[](std::size_t id) const { + return array_[id]; + } + T &operator[](std::size_t id) { + return array_[id]; + } + + bool empty() const { + return array_ == NULL; + } + + void clear() { + if (array_ != NULL) { + delete[] array_; + array_ = NULL; + } + } + void swap(AutoArray *array) { + T *temp = array_; + array_ = array->array_; + array->array_ = temp; + } + void reset(T *array = NULL) { + AutoArray(array).swap(this); + } + + private: + T *array_; + + // Disallows copy and assignment. + AutoArray(const AutoArray &); + AutoArray &operator=(const AutoArray &); +}; + +// +// Memory management of resizable array. +// + +template +class AutoPool { + public: + AutoPool() : buf_(), size_(0), capacity_(0) {} + ~AutoPool() { clear(); } + + const T &operator[](std::size_t id) const { + return *(reinterpret_cast(&buf_[0]) + id); + } + T &operator[](std::size_t id) { + return *(reinterpret_cast(&buf_[0]) + id); + } + + bool empty() const { + return size_ == 0; + } + std::size_t size() const { + return size_; + } + + void clear() { + resize(0); + buf_.clear(); + size_ = 0; + capacity_ = 0; + } + + void push_back(const T &value) { + append(value); + } + void pop_back() { + (*this)[--size_].~T(); + } + + void append() { + if (size_ == capacity_) + resize_buf(size_ + 1); + new(&(*this)[size_++]) T; + } + void append(const T &value) { + if (size_ == capacity_) + resize_buf(size_ + 1); + new(&(*this)[size_++]) T(value); + } + + void resize(std::size_t size) { + while (size_ > size) { + (*this)[--size_].~T(); + } + if (size > capacity_) { + resize_buf(size); + } + while (size_ < size) { + new(&(*this)[size_++]) T; + } + } + void resize(std::size_t size, const T &value) { + while (size_ > size) { + (*this)[--size_].~T(); + } + if (size > capacity_) { + resize_buf(size); + } + while (size_ < size) { + new(&(*this)[size_++]) T(value); + } + } + + void reserve(std::size_t size) { + if (size > capacity_) { + resize_buf(size); + } + } + + private: + AutoArray buf_; + std::size_t size_; + std::size_t capacity_; + + // Disallows copy and assignment. + AutoPool(const AutoPool &); + AutoPool &operator=(const AutoPool &); + + void resize_buf(std::size_t size); +}; + +template +void AutoPool::resize_buf(std::size_t size) { + std::size_t capacity; + if (size >= capacity_ * 2) { + capacity = size; + } else { + capacity = 1; + while (capacity < size) { + capacity <<= 1; + } + } + + AutoArray buf; + try { + buf.reset(new char[sizeof(T) * capacity]); + } catch (const std::bad_alloc &) { + DARTS_THROW("failed to resize pool: std::bad_alloc"); + } + + if (size_ > 0) { + T *src = reinterpret_cast(&buf_[0]); + T *dest = reinterpret_cast(&buf[0]); + for (std::size_t i = 0; i < size_; ++i) { + new(&dest[i]) T(src[i]); + src[i].~T(); + } + } + + buf_.swap(&buf); + capacity_ = capacity; +} + +// +// Memory management of stack. +// + +template +class AutoStack { + public: + AutoStack() : pool_() {} + ~AutoStack() { + clear(); + } + + const T &top() const { + return pool_[size() - 1]; + } + T &top() { + return pool_[size() - 1]; + } + + bool empty() const { + return pool_.empty(); + } + std::size_t size() const { + return pool_.size(); + } + + void push(const T &value) { + pool_.push_back(value); + } + void pop() { + pool_.pop_back(); + } + + void clear() { + pool_.clear(); + } + + private: + AutoPool pool_; + + // Disallows copy and assignment. + AutoStack(const AutoStack &); + AutoStack &operator=(const AutoStack &); +}; + +// +// Succinct bit vector. +// + +class BitVector { + public: + BitVector() : units_(), ranks_(), num_ones_(0), size_(0) {} + ~BitVector() { + clear(); + } + + bool operator[](std::size_t id) const { + return (units_[id / UNIT_SIZE] >> (id % UNIT_SIZE) & 1) == 1; + } + + id_type rank(std::size_t id) const { + std::size_t unit_id = id / UNIT_SIZE; + return ranks_[unit_id] + pop_count(units_[unit_id] + & (~0U >> (UNIT_SIZE - (id % UNIT_SIZE) - 1))); + } + + void set(std::size_t id, bool bit) { + if (bit) { + units_[id / UNIT_SIZE] |= 1U << (id % UNIT_SIZE); + } else { + units_[id / UNIT_SIZE] &= ~(1U << (id % UNIT_SIZE)); + } + } + + bool empty() const { + return units_.empty(); + } + std::size_t num_ones() const { + return num_ones_; + } + std::size_t size() const { + return size_; + } + + void append() { + if ((size_ % UNIT_SIZE) == 0) { + units_.append(0); + } + ++size_; + } + void build(); + + void clear() { + units_.clear(); + ranks_.clear(); + } + + private: + enum { UNIT_SIZE = sizeof(id_type) * 8 }; + + AutoPool units_; + AutoArray ranks_; + std::size_t num_ones_; + std::size_t size_; + + // Disallows copy and assignment. + BitVector(const BitVector &); + BitVector &operator=(const BitVector &); + + static id_type pop_count(id_type unit) { + unit = ((unit & 0xAAAAAAAA) >> 1) + (unit & 0x55555555); + unit = ((unit & 0xCCCCCCCC) >> 2) + (unit & 0x33333333); + unit = ((unit >> 4) + unit) & 0x0F0F0F0F; + unit += unit >> 8; + unit += unit >> 16; + return unit & 0xFF; + } +}; + +inline void BitVector::build() { + try { + ranks_.reset(new id_type[units_.size()]); + } catch (const std::bad_alloc &) { + DARTS_THROW("failed to build rank index: std::bad_alloc"); + } + + num_ones_ = 0; + for (std::size_t i = 0; i < units_.size(); ++i) { + ranks_[i] = num_ones_; + num_ones_ += pop_count(units_[i]); + } +} + +// +// Keyset. +// + +template +class Keyset { + public: + Keyset(std::size_t num_keys, const char_type * const *keys, + const std::size_t *lengths, const T *values) : + num_keys_(num_keys), keys_(keys), lengths_(lengths), values_(values) {} + + std::size_t num_keys() const { + return num_keys_; + } + const char_type *keys(std::size_t id) const { + return keys_[id]; + } + uchar_type keys(std::size_t key_id, std::size_t char_id) const { + if (has_lengths() && char_id >= lengths_[key_id]) + return '\0'; + return keys_[key_id][char_id]; + } + + bool has_lengths() const { + return lengths_ != NULL; + } + std::size_t lengths(std::size_t id) const { + if (has_lengths()) { + return lengths_[id]; + } + std::size_t length = 0; + while (keys_[id][length] != '\0') { + ++length; + } + return length; + } + + bool has_values() const { + return values_ != NULL; + } + const value_type values(std::size_t id) const { + if (has_values()) { + return static_cast(values_[id]); + } + return static_cast(id); + } + + private: + std::size_t num_keys_; + const char_type * const * keys_; + const std::size_t *lengths_; + const T *values_; + + // Disallows copy and assignment. + Keyset(const Keyset &); + Keyset &operator=(const Keyset &); +}; + +// +// Node of Directed Acyclic Word Graph (DAWG). +// + +class DawgNode { + public: + DawgNode() : child_(0), sibling_(0), label_('\0'), + is_state_(false), has_sibling_(false) {} + + void set_child(id_type child) { + child_ = child; + } + void set_sibling(id_type sibling) { + sibling_ = sibling; + } + void set_value(value_type value) { + child_ = value; + } + void set_label(uchar_type label) { + label_ = label; + } + void set_is_state(bool is_state) { + is_state_ = is_state; + } + void set_has_sibling(bool has_sibling) { + has_sibling_ = has_sibling; + } + + id_type child() const { + return child_; + } + id_type sibling() const { + return sibling_; + } + value_type value() const { + return static_cast(child_); + } + uchar_type label() const { + return label_; + } + bool is_state() const { + return is_state_; + } + bool has_sibling() const { + return has_sibling_; + } + + id_type unit() const { + if (label_ == '\0') { + return (child_ << 1) | (has_sibling_ ? 1 : 0); + } + return (child_ << 2) | (is_state_ ? 2 : 0) | (has_sibling_ ? 1 : 0); + } + + private: + id_type child_; + id_type sibling_; + uchar_type label_; + bool is_state_; + bool has_sibling_; + + // Copyable. +}; + +// +// Fixed unit of Directed Acyclic Word Graph (DAWG). +// + +class DawgUnit { + public: + explicit DawgUnit(id_type unit = 0) : unit_(unit) {} + DawgUnit(const DawgUnit &unit) : unit_(unit.unit_) {} + + DawgUnit &operator=(id_type unit) { + unit_ = unit; + return *this; + } + + id_type unit() const { + return unit_; + } + + id_type child() const { + return unit_ >> 2; + } + bool has_sibling() const { + return (unit_ & 1) == 1; + } + value_type value() const { + return static_cast(unit_ >> 1); + } + bool is_state() const { + return (unit_ & 2) == 2; + } + + private: + id_type unit_; + + // Copyable. +}; + +// +// Directed Acyclic Word Graph (DAWG) builder. +// + +class DawgBuilder { + public: + DawgBuilder() : nodes_(), units_(), labels_(), is_intersections_(), + table_(), node_stack_(), recycle_bin_(), num_states_(0) {} + ~DawgBuilder() { + clear(); + } + + id_type root() const { + return 0; + } + + id_type child(id_type id) const { + return units_[id].child(); + } + id_type sibling(id_type id) const { + return units_[id].has_sibling() ? (id + 1) : 0; + } + int value(id_type id) const { + return units_[id].value(); + } + + bool is_leaf(id_type id) const { + return label(id) == '\0'; + } + uchar_type label(id_type id) const { + return labels_[id]; + } + + bool is_intersection(id_type id) const { + return is_intersections_[id]; + } + id_type intersection_id(id_type id) const { + return is_intersections_.rank(id) - 1; + } + + std::size_t num_intersections() const { + return is_intersections_.num_ones(); + } + + std::size_t size() const { + return units_.size(); + } + + void init(); + void finish(); + + void insert(const char *key, std::size_t length, value_type value); + + void clear(); + + private: + enum { INITIAL_TABLE_SIZE = 1 << 10 }; + + AutoPool nodes_; + AutoPool units_; + AutoPool labels_; + BitVector is_intersections_; + AutoPool table_; + AutoStack node_stack_; + AutoStack recycle_bin_; + std::size_t num_states_; + + // Disallows copy and assignment. + DawgBuilder(const DawgBuilder &); + DawgBuilder &operator=(const DawgBuilder &); + + void flush(id_type id); + + void expand_table(); + + id_type find_unit(id_type id, id_type *hash_id) const; + id_type find_node(id_type node_id, id_type *hash_id) const; + + bool are_equal(id_type node_id, id_type unit_id) const; + + id_type hash_unit(id_type id) const; + id_type hash_node(id_type id) const; + + id_type append_node(); + id_type append_unit(); + + void free_node(id_type id) { + recycle_bin_.push(id); + } + + static id_type hash(id_type key) { + key = ~key + (key << 15); // key = (key << 15) - key - 1; + key = key ^ (key >> 12); + key = key + (key << 2); + key = key ^ (key >> 4); + key = key * 2057; // key = (key + (key << 3)) + (key << 11); + key = key ^ (key >> 16); + return key; + } +}; + +inline void DawgBuilder::init() { + table_.resize(INITIAL_TABLE_SIZE, 0); + + append_node(); + append_unit(); + + num_states_ = 1; + + nodes_[0].set_label(0xFF); + node_stack_.push(0); +} + +inline void DawgBuilder::finish() { + flush(0); + + units_[0] = nodes_[0].unit(); + labels_[0] = nodes_[0].label(); + + nodes_.clear(); + table_.clear(); + node_stack_.clear(); + recycle_bin_.clear(); + + is_intersections_.build(); +} + +inline void DawgBuilder::insert(const char *key, std::size_t length, + value_type value) { + if (value < 0) { + DARTS_THROW("failed to insert key: negative value"); + } else if (length == 0) { + DARTS_THROW("failed to insert key: zero-length key"); + } + + id_type id = 0; + std::size_t key_pos = 0; + + for ( ; key_pos <= length; ++key_pos) { + id_type child_id = nodes_[id].child(); + if (child_id == 0) { + break; + } + + uchar_type key_label = static_cast(key[key_pos]); + if (key_pos < length && key_label == '\0') { + DARTS_THROW("failed to insert key: invalid null character"); + } + + uchar_type unit_label = nodes_[child_id].label(); + if (key_label < unit_label) { + DARTS_THROW("failed to insert key: wrong key order"); + } else if (key_label > unit_label) { + nodes_[child_id].set_has_sibling(true); + flush(child_id); + break; + } + id = child_id; + } + + if (key_pos > length) { + return; + } + + for ( ; key_pos <= length; ++key_pos) { + uchar_type key_label = static_cast( + (key_pos < length) ? key[key_pos] : '\0'); + id_type child_id = append_node(); + + if (nodes_[id].child() == 0) { + nodes_[child_id].set_is_state(true); + } + nodes_[child_id].set_sibling(nodes_[id].child()); + nodes_[child_id].set_label(key_label); + nodes_[id].set_child(child_id); + node_stack_.push(child_id); + + id = child_id; + } + nodes_[id].set_value(value); +} + +inline void DawgBuilder::clear() { + nodes_.clear(); + units_.clear(); + labels_.clear(); + is_intersections_.clear(); + table_.clear(); + node_stack_.clear(); + recycle_bin_.clear(); + num_states_ = 0; +} + +inline void DawgBuilder::flush(id_type id) { + while (node_stack_.top() != id) { + id_type node_id = node_stack_.top(); + node_stack_.pop(); + + if (num_states_ >= table_.size() - (table_.size() >> 2)) { + expand_table(); + } + + id_type num_siblings = 0; + for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) { + ++num_siblings; + } + + id_type hash_id; + id_type match_id = find_node(node_id, &hash_id); + if (match_id != 0) { + is_intersections_.set(match_id, true); + } else { + id_type unit_id = 0; + for (id_type i = 0; i < num_siblings; ++i) { + unit_id = append_unit(); + } + for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) { + units_[unit_id] = nodes_[i].unit(); + labels_[unit_id] = nodes_[i].label(); + --unit_id; + } + match_id = unit_id + 1; + table_[hash_id] = match_id; + ++num_states_; + } + + for (id_type i = node_id, next; i != 0; i = next) { + next = nodes_[i].sibling(); + free_node(i); + } + + nodes_[node_stack_.top()].set_child(match_id); + } + node_stack_.pop(); +} + +inline void DawgBuilder::expand_table() { + std::size_t table_size = table_.size() << 1; + table_.clear(); + table_.resize(table_size, 0); + + for (std::size_t i = 1; i < units_.size(); ++i) { + id_type id = static_cast(i); + if (labels_[id] == '\0' || units_[id].is_state()) { + id_type hash_id; + find_unit(id, &hash_id); + table_[hash_id] = id; + } + } +} + +inline id_type DawgBuilder::find_unit(id_type id, id_type *hash_id) const { + *hash_id = hash_unit(id) % table_.size(); + for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) { + id_type unit_id = table_[*hash_id]; + if (unit_id == 0) { + break; + } + + // There must not be the same unit. + } + return 0; +} + +inline id_type DawgBuilder::find_node(id_type node_id, + id_type *hash_id) const { + *hash_id = hash_node(node_id) % table_.size(); + for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) { + id_type unit_id = table_[*hash_id]; + if (unit_id == 0) { + break; + } + + if (are_equal(node_id, unit_id)) { + return unit_id; + } + } + return 0; +} + +inline bool DawgBuilder::are_equal(id_type node_id, id_type unit_id) const { + for (id_type i = nodes_[node_id].sibling(); i != 0; + i = nodes_[i].sibling()) { + if (units_[unit_id].has_sibling() == false) { + return false; + } + ++unit_id; + } + if (units_[unit_id].has_sibling() == true) { + return false; + } + + for (id_type i = node_id; i != 0; i = nodes_[i].sibling(), --unit_id) { + if (nodes_[i].unit() != units_[unit_id].unit() || + nodes_[i].label() != labels_[unit_id]) { + return false; + } + } + return true; +} + +inline id_type DawgBuilder::hash_unit(id_type id) const { + id_type hash_value = 0; + for ( ; id != 0; ++id) { + id_type unit = units_[id].unit(); + uchar_type label = labels_[id]; + hash_value ^= hash((label << 24) ^ unit); + + if (units_[id].has_sibling() == false) { + break; + } + } + return hash_value; +} + +inline id_type DawgBuilder::hash_node(id_type id) const { + id_type hash_value = 0; + for ( ; id != 0; id = nodes_[id].sibling()) { + id_type unit = nodes_[id].unit(); + uchar_type label = nodes_[id].label(); + hash_value ^= hash((label << 24) ^ unit); + } + return hash_value; +} + +inline id_type DawgBuilder::append_unit() { + is_intersections_.append(); + units_.append(); + labels_.append(); + + return static_cast(is_intersections_.size() - 1); +} + +inline id_type DawgBuilder::append_node() { + id_type id; + if (recycle_bin_.empty()) { + id = static_cast(nodes_.size()); + nodes_.append(); + } else { + id = recycle_bin_.top(); + nodes_[id] = DawgNode(); + recycle_bin_.pop(); + } + return id; +} + +// +// Unit of double-array builder. +// + +class DoubleArrayBuilderUnit { + public: + DoubleArrayBuilderUnit() : unit_(0) {} + + void set_has_leaf(bool has_leaf) { + if (has_leaf) { + unit_ |= 1U << 8; + } else { + unit_ &= ~(1U << 8); + } + } + void set_value(value_type value) { + unit_ = value | (1U << 31); + } + void set_label(uchar_type label) { + unit_ = (unit_ & ~0xFFU) | label; + } + void set_offset(id_type offset) { + if (offset >= 1U << 29) { + DARTS_THROW("failed to modify unit: too large offset"); + } + unit_ &= (1U << 31) | (1U << 8) | 0xFF; + if (offset < 1U << 21) { + unit_ |= (offset << 10); + } else { + unit_ |= (offset << 2) | (1U << 9); + } + } + + private: + id_type unit_; + + // Copyable. +}; + +// +// Extra unit of double-array builder. +// + +class DoubleArrayBuilderExtraUnit { + public: + DoubleArrayBuilderExtraUnit() : prev_(0), next_(0), + is_fixed_(false), is_used_(false) {} + + void set_prev(id_type prev) { + prev_ = prev; + } + void set_next(id_type next) { + next_ = next; + } + void set_is_fixed(bool is_fixed) { + is_fixed_ = is_fixed; + } + void set_is_used(bool is_used) { + is_used_ = is_used; + } + + id_type prev() const { + return prev_; + } + id_type next() const { + return next_; + } + bool is_fixed() const { + return is_fixed_; + } + bool is_used() const { + return is_used_; + } + + private: + id_type prev_; + id_type next_; + bool is_fixed_; + bool is_used_; + + // Copyable. +}; + +// +// DAWG -> double-array converter. +// + +class DoubleArrayBuilder { + public: + explicit DoubleArrayBuilder(progress_func_type progress_func) + : progress_func_(progress_func), units_(), extras_(), labels_(), + table_(), extras_head_(0) {} + ~DoubleArrayBuilder() { + clear(); + } + + template + void build(const Keyset &keyset); + void copy(std::size_t *size_ptr, DoubleArrayUnit **buf_ptr) const; + + void clear(); + + private: + enum { BLOCK_SIZE = 256 }; + enum { NUM_EXTRA_BLOCKS = 16 }; + enum { NUM_EXTRAS = BLOCK_SIZE * NUM_EXTRA_BLOCKS }; + + enum { UPPER_MASK = 0xFF << 21 }; + enum { LOWER_MASK = 0xFF }; + + typedef DoubleArrayBuilderUnit unit_type; + typedef DoubleArrayBuilderExtraUnit extra_type; + + progress_func_type progress_func_; + AutoPool units_; + AutoArray extras_; + AutoPool labels_; + AutoArray table_; + id_type extras_head_; + + // Disallows copy and assignment. + DoubleArrayBuilder(const DoubleArrayBuilder &); + DoubleArrayBuilder &operator=(const DoubleArrayBuilder &); + + std::size_t num_blocks() const { + return units_.size() / BLOCK_SIZE; + } + + const extra_type &extras(id_type id) const { + return extras_[id % NUM_EXTRAS]; + } + extra_type &extras(id_type id) { + return extras_[id % NUM_EXTRAS]; + } + + template + void build_dawg(const Keyset &keyset, DawgBuilder *dawg_builder); + void build_from_dawg(const DawgBuilder &dawg); + void build_from_dawg(const DawgBuilder &dawg, + id_type dawg_id, id_type dic_id); + id_type arrange_from_dawg(const DawgBuilder &dawg, + id_type dawg_id, id_type dic_id); + + template + void build_from_keyset(const Keyset &keyset); + template + void build_from_keyset(const Keyset &keyset, std::size_t begin, + std::size_t end, std::size_t depth, id_type dic_id); + template + id_type arrange_from_keyset(const Keyset &keyset, std::size_t begin, + std::size_t end, std::size_t depth, id_type dic_id); + + id_type find_valid_offset(id_type id) const; + bool is_valid_offset(id_type id, id_type offset) const; + + void reserve_id(id_type id); + void expand_units(); + + void fix_all_blocks(); + void fix_block(id_type block_id); +}; + +template +void DoubleArrayBuilder::build(const Keyset &keyset) { + if (keyset.has_values()) { + Details::DawgBuilder dawg_builder; + build_dawg(keyset, &dawg_builder); + build_from_dawg(dawg_builder); + dawg_builder.clear(); + } else { + build_from_keyset(keyset); + } +} + +inline void DoubleArrayBuilder::copy(std::size_t *size_ptr, + DoubleArrayUnit **buf_ptr) const { + if (size_ptr != NULL) { + *size_ptr = units_.size(); + } + if (buf_ptr != NULL) { + *buf_ptr = new DoubleArrayUnit[units_.size()]; + unit_type *units = reinterpret_cast(*buf_ptr); + for (std::size_t i = 0; i < units_.size(); ++i) { + units[i] = units_[i]; + } + } +} + +inline void DoubleArrayBuilder::clear() { + units_.clear(); + extras_.clear(); + labels_.clear(); + table_.clear(); + extras_head_ = 0; +} + +template +void DoubleArrayBuilder::build_dawg(const Keyset &keyset, + DawgBuilder *dawg_builder) { + dawg_builder->init(); + for (std::size_t i = 0; i < keyset.num_keys(); ++i) { + dawg_builder->insert(keyset.keys(i), keyset.lengths(i), keyset.values(i)); + if (progress_func_ != NULL) { + progress_func_(i + 1, keyset.num_keys() + 1); + } + } + dawg_builder->finish(); +} + +inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg) { + std::size_t num_units = 1; + while (num_units < dawg.size()) { + num_units <<= 1; + } + units_.reserve(num_units); + + table_.reset(new id_type[dawg.num_intersections()]); + for (std::size_t i = 0; i < dawg.num_intersections(); ++i) { + table_[i] = 0; + } + + extras_.reset(new extra_type[NUM_EXTRAS]); + + reserve_id(0); + extras(0).set_is_used(true); + units_[0].set_offset(1); + units_[0].set_label('\0'); + + if (dawg.child(dawg.root()) != 0) { + build_from_dawg(dawg, dawg.root(), 0); + } + + fix_all_blocks(); + + extras_.clear(); + labels_.clear(); + table_.clear(); +} + +inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg, + id_type dawg_id, id_type dic_id) { + id_type dawg_child_id = dawg.child(dawg_id); + if (dawg.is_intersection(dawg_child_id)) { + id_type intersection_id = dawg.intersection_id(dawg_child_id); + id_type offset = table_[intersection_id]; + if (offset != 0) { + offset ^= dic_id; + if (!(offset & UPPER_MASK) || !(offset & LOWER_MASK)) { + if (dawg.is_leaf(dawg_child_id)) { + units_[dic_id].set_has_leaf(true); + } + units_[dic_id].set_offset(offset); + return; + } + } + } + + id_type offset = arrange_from_dawg(dawg, dawg_id, dic_id); + if (dawg.is_intersection(dawg_child_id)) { + table_[dawg.intersection_id(dawg_child_id)] = offset; + } + + do { + uchar_type child_label = dawg.label(dawg_child_id); + id_type dic_child_id = offset ^ child_label; + if (child_label != '\0') { + build_from_dawg(dawg, dawg_child_id, dic_child_id); + } + dawg_child_id = dawg.sibling(dawg_child_id); + } while (dawg_child_id != 0); +} + +inline id_type DoubleArrayBuilder::arrange_from_dawg(const DawgBuilder &dawg, + id_type dawg_id, id_type dic_id) { + labels_.resize(0); + + id_type dawg_child_id = dawg.child(dawg_id); + while (dawg_child_id != 0) { + labels_.append(dawg.label(dawg_child_id)); + dawg_child_id = dawg.sibling(dawg_child_id); + } + + id_type offset = find_valid_offset(dic_id); + units_[dic_id].set_offset(dic_id ^ offset); + + dawg_child_id = dawg.child(dawg_id); + for (std::size_t i = 0; i < labels_.size(); ++i) { + id_type dic_child_id = offset ^ labels_[i]; + reserve_id(dic_child_id); + + if (dawg.is_leaf(dawg_child_id)) { + units_[dic_id].set_has_leaf(true); + units_[dic_child_id].set_value(dawg.value(dawg_child_id)); + } else { + units_[dic_child_id].set_label(labels_[i]); + } + + dawg_child_id = dawg.sibling(dawg_child_id); + } + extras(offset).set_is_used(true); + + return offset; +} + +template +void DoubleArrayBuilder::build_from_keyset(const Keyset &keyset) { + std::size_t num_units = 1; + while (num_units < keyset.num_keys()) { + num_units <<= 1; + } + units_.reserve(num_units); + + extras_.reset(new extra_type[NUM_EXTRAS]); + + reserve_id(0); + extras(0).set_is_used(true); + units_[0].set_offset(1); + units_[0].set_label('\0'); + + if (keyset.num_keys() > 0) { + build_from_keyset(keyset, 0, keyset.num_keys(), 0, 0); + } + + fix_all_blocks(); + + extras_.clear(); + labels_.clear(); +} + +template +void DoubleArrayBuilder::build_from_keyset(const Keyset &keyset, + std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) { + id_type offset = arrange_from_keyset(keyset, begin, end, depth, dic_id); + + while (begin < end) { + if (keyset.keys(begin, depth) != '\0') { + break; + } + ++begin; + } + if (begin == end) { + return; + } + + std::size_t last_begin = begin; + uchar_type last_label = keyset.keys(begin, depth); + while (++begin < end) { + uchar_type label = keyset.keys(begin, depth); + if (label != last_label) { + build_from_keyset(keyset, last_begin, begin, + depth + 1, offset ^ last_label); + last_begin = begin; + last_label = keyset.keys(begin, depth); + } + } + build_from_keyset(keyset, last_begin, end, depth + 1, offset ^ last_label); +} + +template +id_type DoubleArrayBuilder::arrange_from_keyset(const Keyset &keyset, + std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) { + labels_.resize(0); + + value_type value = -1; + for (std::size_t i = begin; i < end; ++i) { + uchar_type label = keyset.keys(i, depth); + if (label == '\0') { + if (keyset.has_lengths() && depth < keyset.lengths(i)) { + DARTS_THROW("failed to build double-array: " + "invalid null character"); + } else if (keyset.values(i) < 0) { + DARTS_THROW("failed to build double-array: negative value"); + } + + if (value == -1) { + value = keyset.values(i); + } + if (progress_func_ != NULL) { + progress_func_(i + 1, keyset.num_keys() + 1); + } + } + + if (labels_.empty()) { + labels_.append(label); + } else if (label != labels_[labels_.size() - 1]) { + if (label < labels_[labels_.size() - 1]) { + DARTS_THROW("failed to build double-array: wrong key order"); + } + labels_.append(label); + } + } + + id_type offset = find_valid_offset(dic_id); + units_[dic_id].set_offset(dic_id ^ offset); + + for (std::size_t i = 0; i < labels_.size(); ++i) { + id_type dic_child_id = offset ^ labels_[i]; + reserve_id(dic_child_id); + if (labels_[i] == '\0') { + units_[dic_id].set_has_leaf(true); + units_[dic_child_id].set_value(value); + } else { + units_[dic_child_id].set_label(labels_[i]); + } + } + extras(offset).set_is_used(true); + + return offset; +} + +inline id_type DoubleArrayBuilder::find_valid_offset(id_type id) const { + if (extras_head_ >= units_.size()) { + return units_.size() | (id & LOWER_MASK); + } + + id_type unfixed_id = extras_head_; + do { + id_type offset = unfixed_id ^ labels_[0]; + if (is_valid_offset(id, offset)) { + return offset; + } + unfixed_id = extras(unfixed_id).next(); + } while (unfixed_id != extras_head_); + + return units_.size() | (id & LOWER_MASK); +} + +inline bool DoubleArrayBuilder::is_valid_offset(id_type id, + id_type offset) const { + if (extras(offset).is_used()) { + return false; + } + + id_type rel_offset = id ^ offset; + if ((rel_offset & LOWER_MASK) && (rel_offset & UPPER_MASK)) { + return false; + } + + for (std::size_t i = 1; i < labels_.size(); ++i) { + if (extras(offset ^ labels_[i]).is_fixed()) { + return false; + } + } + + return true; +} + +inline void DoubleArrayBuilder::reserve_id(id_type id) { + if (id >= units_.size()) { + expand_units(); + } + + if (id == extras_head_) { + extras_head_ = extras(id).next(); + if (extras_head_ == id) { + extras_head_ = units_.size(); + } + } + extras(extras(id).prev()).set_next(extras(id).next()); + extras(extras(id).next()).set_prev(extras(id).prev()); + extras(id).set_is_fixed(true); +} + +inline void DoubleArrayBuilder::expand_units() { + id_type src_num_units = units_.size(); + id_type src_num_blocks = num_blocks(); + + id_type dest_num_units = src_num_units + BLOCK_SIZE; + id_type dest_num_blocks = src_num_blocks + 1; + + if (dest_num_blocks > NUM_EXTRA_BLOCKS) { + fix_block(src_num_blocks - NUM_EXTRA_BLOCKS); + } + + units_.resize(dest_num_units); + + if (dest_num_blocks > NUM_EXTRA_BLOCKS) { + for (std::size_t id = src_num_units; id < dest_num_units; ++id) { + extras(id).set_is_used(false); + extras(id).set_is_fixed(false); + } + } + + for (id_type i = src_num_units + 1; i < dest_num_units; ++i) { + extras(i - 1).set_next(i); + extras(i).set_prev(i - 1); + } + + extras(src_num_units).set_prev(dest_num_units - 1); + extras(dest_num_units - 1).set_next(src_num_units); + + extras(src_num_units).set_prev(extras(extras_head_).prev()); + extras(dest_num_units - 1).set_next(extras_head_); + + extras(extras(extras_head_).prev()).set_next(src_num_units); + extras(extras_head_).set_prev(dest_num_units - 1); +} + +inline void DoubleArrayBuilder::fix_all_blocks() { + id_type begin = 0; + if (num_blocks() > NUM_EXTRA_BLOCKS) { + begin = num_blocks() - NUM_EXTRA_BLOCKS; + } + id_type end = num_blocks(); + + for (id_type block_id = begin; block_id != end; ++block_id) { + fix_block(block_id); + } +} + +inline void DoubleArrayBuilder::fix_block(id_type block_id) { + id_type begin = block_id * BLOCK_SIZE; + id_type end = begin + BLOCK_SIZE; + + id_type unused_offset = 0; + for (id_type offset = begin; offset != end; ++offset) { + if (!extras(offset).is_used()) { + unused_offset = offset; + break; + } + } + + for (id_type id = begin; id != end; ++id) { + if (!extras(id).is_fixed()) { + reserve_id(id); + units_[id].set_label(static_cast(id ^ unused_offset)); + } + } +} + +} // namespace Details + +// +// Member function build() of DoubleArrayImpl. +// + +template +int DoubleArrayImpl::build(std::size_t num_keys, + const key_type * const *keys, const std::size_t *lengths, + const value_type *values, Details::progress_func_type progress_func) { + Details::Keyset keyset(num_keys, keys, lengths, values); + + Details::DoubleArrayBuilder builder(progress_func); + builder.build(keyset); + + std::size_t size = 0; + unit_type *buf = NULL; + builder.copy(&size, &buf); + + clear(); + + size_ = size; + array_ = buf; + buf_ = buf; + + if (progress_func != NULL) { + progress_func(num_keys + 1, num_keys + 1); + } + + return 0; +} + +} // namespace Darts + +#undef DARTS_INT_TO_STR +#undef DARTS_LINE_TO_STR +#undef DARTS_LINE_STR +#undef DARTS_THROW + +#endif // DARTS_H_ diff --git a/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp b/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp index c6c55d5..ba3abe0 100644 --- a/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp +++ b/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ /************************************ * file enc : ascii * author : wuyanyi09@gmail.com @@ -33,54 +15,54 @@ namespace limonp { using namespace std; class ArgvContext { -public : - ArgvContext(int argc, const char* const * argv) { - for(int i = 0; i < argc; i++) { - if(StartsWith(argv[i], "-")) { - if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) { - mpss_[argv[i]] = argv[i + 1]; - i++; - } else { - sset_.insert(argv[i]); - } - } else { - args_.push_back(argv[i]); - } + public : + ArgvContext(int argc, const char* const * argv) { + for(int i = 0; i < argc; i++) { + if(StartsWith(argv[i], "-")) { + if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) { + mpss_[argv[i]] = argv[i+1]; + i++; + } else { + sset_.insert(argv[i]); } + } else { + args_.push_back(argv[i]); + } } - ~ArgvContext() { - } + } + ~ArgvContext() { + } - friend ostream& operator << (ostream& os, const ArgvContext& args); - string operator [](size_t i) const { - if(i < args_.size()) { - return args_[i]; - } - return ""; + friend ostream& operator << (ostream& os, const ArgvContext& args); + string operator [](size_t i) const { + if(i < args_.size()) { + return args_[i]; } - string operator [](const string& key) const { - map::const_iterator it = mpss_.find(key); - if(it != mpss_.end()) { - return it->second; - } - return ""; + return ""; + } + string operator [](const string& key) const { + map::const_iterator it = mpss_.find(key); + if(it != mpss_.end()) { + return it->second; } + return ""; + } - bool HasKey(const string& key) const { - if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) { - return true; - } - return false; + bool HasKey(const string& key) const { + if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) { + return true; } + return false; + } -private: - vector args_; - map mpss_; - set sset_; + private: + vector args_; + map mpss_; + set sset_; }; // class ArgvContext inline ostream& operator << (ostream& os, const ArgvContext& args) { - return os << args.args_ << args.mpss_ << args.sset_; + return os<. - * - * - */ #ifndef LIMONP_BLOCKINGQUEUE_HPP #define LIMONP_BLOCKINGQUEUE_HPP @@ -25,41 +7,41 @@ namespace limonp { template class BlockingQueue: NonCopyable { -public: - BlockingQueue() - : mutex_(), notEmpty_(mutex_), queue_() { - } + public: + BlockingQueue() + : mutex_(), notEmpty_(mutex_), queue_() { + } - void Push(const T& x) { - MutexLockGuard lock(mutex_); - queue_.push(x); - notEmpty_.Notify(); // Wait morphing saves us - } + void Push(const T& x) { + MutexLockGuard lock(mutex_); + queue_.push(x); + notEmpty_.Notify(); // Wait morphing saves us + } - T Pop() { - MutexLockGuard lock(mutex_); - // always use a while-loop, due to spurious wakeup - while(queue_.empty()) { - notEmpty_.Wait(); - } - assert(!queue_.empty()); - T front(queue_.front()); - queue_.pop(); - return front; + T Pop() { + MutexLockGuard lock(mutex_); + // always use a while-loop, due to spurious wakeup + while (queue_.empty()) { + notEmpty_.Wait(); } + assert(!queue_.empty()); + T front(queue_.front()); + queue_.pop(); + return front; + } - size_t Size() const { - MutexLockGuard lock(mutex_); - return queue_.size(); - } - bool Empty() const { - return Size() == 0; - } + size_t Size() const { + MutexLockGuard lock(mutex_); + return queue_.size(); + } + bool Empty() const { + return Size() == 0; + } -private: - mutable MutexLock mutex_; - Condition notEmpty_; - std::queue queue_; + private: + mutable MutexLock mutex_; + Condition notEmpty_; + std::queue queue_; }; // class BlockingQueue } // namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp b/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp index 41a5ef9..598d099 100644 --- a/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp +++ b/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP #define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP @@ -25,59 +7,59 @@ namespace limonp { template class BoundedBlockingQueue : NonCopyable { -public: - explicit BoundedBlockingQueue(size_t maxSize) - : mutex_(), - notEmpty_(mutex_), - notFull_(mutex_), - queue_(maxSize) { - } + public: + explicit BoundedBlockingQueue(size_t maxSize) + : mutex_(), + notEmpty_(mutex_), + notFull_(mutex_), + queue_(maxSize) { + } - void Push(const T& x) { - MutexLockGuard lock(mutex_); - while(queue_.Full()) { - notFull_.Wait(); - } - assert(!queue_.Full()); - queue_.Push(x); - notEmpty_.Notify(); + void Push(const T& x) { + MutexLockGuard lock(mutex_); + while (queue_.Full()) { + notFull_.Wait(); } + assert(!queue_.Full()); + queue_.Push(x); + notEmpty_.Notify(); + } - T Pop() { - MutexLockGuard lock(mutex_); - while(queue_.Empty()) { - notEmpty_.Wait(); - } - assert(!queue_.Empty()); - T res = queue_.Pop(); - notFull_.Notify(); - return res; + T Pop() { + MutexLockGuard lock(mutex_); + while (queue_.Empty()) { + notEmpty_.Wait(); } + assert(!queue_.Empty()); + T res = queue_.Pop(); + notFull_.Notify(); + return res; + } - bool Empty() const { - MutexLockGuard lock(mutex_); - return queue_.Empty(); - } + bool Empty() const { + MutexLockGuard lock(mutex_); + return queue_.Empty(); + } - bool Full() const { - MutexLockGuard lock(mutex_); - return queue_.Full(); - } + bool Full() const { + MutexLockGuard lock(mutex_); + return queue_.Full(); + } - size_t size() const { - MutexLockGuard lock(mutex_); - return queue_.size(); - } + size_t size() const { + MutexLockGuard lock(mutex_); + return queue_.size(); + } - size_t capacity() const { - return queue_.capacity(); - } + size_t capacity() const { + return queue_.capacity(); + } -private: - mutable MutexLock mutex_; - Condition notEmpty_; - Condition notFull_; - BoundedQueue queue_; + private: + mutable MutexLock mutex_; + Condition notEmpty_; + Condition notFull_; + BoundedQueue queue_; }; // class BoundedBlockingQueue } // namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp b/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp index ef86e26..f52a107 100644 --- a/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp +++ b/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_BOUNDED_QUEUE_HPP #define LIMONP_BOUNDED_QUEUE_HPP @@ -27,55 +9,55 @@ namespace limonp { using namespace std; template class BoundedQueue { -public: - explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) { - head_ = 0; - tail_ = 0; - size_ = 0; - assert(capacity_); - } - ~BoundedQueue() { - } + public: + explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) { + head_ = 0; + tail_ = 0; + size_ = 0; + assert(capacity_); + } + ~BoundedQueue() { + } - void Clear() { - head_ = 0; - tail_ = 0; - size_ = 0; - } - bool Empty() const { - return !size_; - } - bool Full() const { - return capacity_ == size_; - } - size_t Size() const { - return size_; - } - size_t Capacity() const { - return capacity_; - } + void Clear() { + head_ = 0; + tail_ = 0; + size_ = 0; + } + bool Empty() const { + return !size_; + } + bool Full() const { + return capacity_ == size_; + } + size_t Size() const { + return size_; + } + size_t Capacity() const { + return capacity_; + } - void Push(const T& t) { - assert(!Full()); - circular_buffer_[tail_] = t; - tail_ = (tail_ + 1) % capacity_; - size_ ++; - } + void Push(const T& t) { + assert(!Full()); + circular_buffer_[tail_] = t; + tail_ = (tail_ + 1) % capacity_; + size_ ++; + } - T Pop() { - assert(!Empty()); - size_t oldPos = head_; - head_ = (head_ + 1) % capacity_; - size_ --; - return circular_buffer_[oldPos]; - } + T Pop() { + assert(!Empty()); + size_t oldPos = head_; + head_ = (head_ + 1) % capacity_; + size_ --; + return circular_buffer_[oldPos]; + } -private: - size_t head_; - size_t tail_; - size_t size_; - const size_t capacity_; - vector circular_buffer_; + private: + size_t head_; + size_t tail_; + size_t size_; + const size_t capacity_; + vector circular_buffer_; }; // class BoundedQueue } // namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/Closure.hpp b/libchinese-segmentation/cppjieba/limonp/Closure.hpp index 9fab60f..c9d9dd4 100644 --- a/libchinese-segmentation/cppjieba/limonp/Closure.hpp +++ b/libchinese-segmentation/cppjieba/limonp/Closure.hpp @@ -1,222 +1,204 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_CLOSURE_HPP #define LIMONP_CLOSURE_HPP namespace limonp { class ClosureInterface { -public: - virtual ~ClosureInterface() { - } - virtual void Run() = 0; + public: + virtual ~ClosureInterface() { + } + virtual void Run() = 0; }; template class Closure0: public ClosureInterface { -public: - Closure0(Funct fun) { - fun_ = fun; - } - virtual ~Closure0() { - } - virtual void Run() { - (*fun_)(); - } -private: - Funct fun_; -}; + public: + Closure0(Funct fun) { + fun_ = fun; + } + virtual ~Closure0() { + } + virtual void Run() { + (*fun_)(); + } + private: + Funct fun_; +}; template class Closure1: public ClosureInterface { -public: - Closure1(Funct fun, Arg1 arg1) { - fun_ = fun; - arg1_ = arg1; - } - virtual ~Closure1() { - } - virtual void Run() { - (*fun_)(arg1_); - } -private: - Funct fun_; - Arg1 arg1_; -}; + public: + Closure1(Funct fun, Arg1 arg1) { + fun_ = fun; + arg1_ = arg1; + } + virtual ~Closure1() { + } + virtual void Run() { + (*fun_)(arg1_); + } + private: + Funct fun_; + Arg1 arg1_; +}; template class Closure2: public ClosureInterface { -public: - Closure2(Funct fun, Arg1 arg1, Arg2 arg2) { - fun_ = fun; - arg1_ = arg1; - arg2_ = arg2; - } - virtual ~Closure2() { - } - virtual void Run() { - (*fun_)(arg1_, arg2_); - } -private: - Funct fun_; - Arg1 arg1_; - Arg2 arg2_; -}; + public: + Closure2(Funct fun, Arg1 arg1, Arg2 arg2) { + fun_ = fun; + arg1_ = arg1; + arg2_ = arg2; + } + virtual ~Closure2() { + } + virtual void Run() { + (*fun_)(arg1_, arg2_); + } + private: + Funct fun_; + Arg1 arg1_; + Arg2 arg2_; +}; template class Closure3: public ClosureInterface { -public: - Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) { - fun_ = fun; - arg1_ = arg1; - arg2_ = arg2; - arg3_ = arg3; - } - virtual ~Closure3() { - } - virtual void Run() { - (*fun_)(arg1_, arg2_, arg3_); - } -private: - Funct fun_; - Arg1 arg1_; - Arg2 arg2_; - Arg3 arg3_; -}; + public: + Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) { + fun_ = fun; + arg1_ = arg1; + arg2_ = arg2; + arg3_ = arg3; + } + virtual ~Closure3() { + } + virtual void Run() { + (*fun_)(arg1_, arg2_, arg3_); + } + private: + Funct fun_; + Arg1 arg1_; + Arg2 arg2_; + Arg3 arg3_; +}; -template +template class ObjClosure0: public ClosureInterface { -public: - ObjClosure0(Obj* p, Funct fun) { - p_ = p; - fun_ = fun; - } - virtual ~ObjClosure0() { - } - virtual void Run() { - (p_->*fun_)(); - } -private: - Obj* p_; - Funct fun_; -}; + public: + ObjClosure0(Obj* p, Funct fun) { + p_ = p; + fun_ = fun; + } + virtual ~ObjClosure0() { + } + virtual void Run() { + (p_->*fun_)(); + } + private: + Obj* p_; + Funct fun_; +}; -template +template class ObjClosure1: public ClosureInterface { -public: - ObjClosure1(Obj* p, Funct fun, Arg1 arg1) { - p_ = p; - fun_ = fun; - arg1_ = arg1; - } - virtual ~ObjClosure1() { - } - virtual void Run() { - (p_->*fun_)(arg1_); - } -private: - Obj* p_; - Funct fun_; - Arg1 arg1_; -}; + public: + ObjClosure1(Obj* p, Funct fun, Arg1 arg1) { + p_ = p; + fun_ = fun; + arg1_ = arg1; + } + virtual ~ObjClosure1() { + } + virtual void Run() { + (p_->*fun_)(arg1_); + } + private: + Obj* p_; + Funct fun_; + Arg1 arg1_; +}; -template +template class ObjClosure2: public ClosureInterface { -public: - ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) { - p_ = p; - fun_ = fun; - arg1_ = arg1; - arg2_ = arg2; - } - virtual ~ObjClosure2() { - } - virtual void Run() { - (p_->*fun_)(arg1_, arg2_); - } -private: - Obj* p_; - Funct fun_; - Arg1 arg1_; - Arg2 arg2_; -}; -template + public: + ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) { + p_ = p; + fun_ = fun; + arg1_ = arg1; + arg2_ = arg2; + } + virtual ~ObjClosure2() { + } + virtual void Run() { + (p_->*fun_)(arg1_, arg2_); + } + private: + Obj* p_; + Funct fun_; + Arg1 arg1_; + Arg2 arg2_; +}; +template class ObjClosure3: public ClosureInterface { -public: - ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) { - p_ = p; - fun_ = fun; - arg1_ = arg1; - arg2_ = arg2; - arg3_ = arg3; - } - virtual ~ObjClosure3() { - } - virtual void Run() { - (p_->*fun_)(arg1_, arg2_, arg3_); - } -private: - Obj* p_; - Funct fun_; - Arg1 arg1_; - Arg2 arg2_; - Arg3 arg3_; -}; + public: + ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) { + p_ = p; + fun_ = fun; + arg1_ = arg1; + arg2_ = arg2; + arg3_ = arg3; + } + virtual ~ObjClosure3() { + } + virtual void Run() { + (p_->*fun_)(arg1_, arg2_, arg3_); + } + private: + Obj* p_; + Funct fun_; + Arg1 arg1_; + Arg2 arg2_; + Arg3 arg3_; +}; template -ClosureInterface* NewClosure(R(*fun)()) { - return new Closure0(fun); +ClosureInterface* NewClosure(R (*fun)()) { + return new Closure0(fun); } template -ClosureInterface* NewClosure(R(*fun)(Arg1), Arg1 arg1) { - return new Closure1(fun, arg1); +ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) { + return new Closure1(fun, arg1); } template -ClosureInterface* NewClosure(R(*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) { - return new Closure2(fun, arg1, arg2); +ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) { + return new Closure2(fun, arg1, arg2); } template -ClosureInterface* NewClosure(R(*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) { - return new Closure3(fun, arg1, arg2, arg3); +ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) { + return new Closure3(fun, arg1, arg2, arg3); } template -ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)()) { - return new ObjClosure0(obj, fun); +ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) { + return new ObjClosure0(obj, fun); } template -ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1), Arg1 arg1) { - return new ObjClosure1(obj, fun, arg1); +ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) { + return new ObjClosure1(obj, fun, arg1); } template -ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) { - return new ObjClosure2(obj, fun, arg1, arg2); +ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) { + return new ObjClosure2(obj, fun, arg1, arg2); } template -ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) { - return new ObjClosure3(obj, fun, arg1, arg2, arg3); +ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) { + return new ObjClosure3(obj, fun, arg1, arg2, arg3); } } // namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/Colors.hpp b/libchinese-segmentation/cppjieba/limonp/Colors.hpp index e097921..04edd7e 100644 --- a/libchinese-segmentation/cppjieba/limonp/Colors.hpp +++ b/libchinese-segmentation/cppjieba/limonp/Colors.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_COLOR_PRINT_HPP #define LIMONP_COLOR_PRINT_HPP @@ -27,21 +9,21 @@ namespace limonp { using std::string; enum Color { - BLACK = 30, - RED, - GREEN, - YELLOW, - BLUE, - PURPLE + BLACK = 30, + RED, + GREEN, + YELLOW, + BLUE, + PURPLE }; // enum Color static void ColorPrintln(enum Color color, const char * fmt, ...) { - va_list ap; - printf("\033[0;%dm", color); - va_start(ap, fmt); - vprintf(fmt, ap); - va_end(ap); - printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly + va_list ap; + printf("\033[0;%dm", color); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly } } // namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/Condition.hpp b/libchinese-segmentation/cppjieba/limonp/Condition.hpp index c25abbf..656a61d 100644 --- a/libchinese-segmentation/cppjieba/limonp/Condition.hpp +++ b/libchinese-segmentation/cppjieba/limonp/Condition.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_CONDITION_HPP #define LIMONP_CONDITION_HPP @@ -24,31 +6,31 @@ namespace limonp { class Condition : NonCopyable { -public: - explicit Condition(MutexLock& mutex) - : mutex_(mutex) { - XCHECK(!pthread_cond_init(&pcond_, NULL)); - } + public: + explicit Condition(MutexLock& mutex) + : mutex_(mutex) { + XCHECK(!pthread_cond_init(&pcond_, NULL)); + } - ~Condition() { - XCHECK(!pthread_cond_destroy(&pcond_)); - } + ~Condition() { + XCHECK(!pthread_cond_destroy(&pcond_)); + } - void Wait() { - XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex())); - } + void Wait() { + XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex())); + } - void Notify() { - XCHECK(!pthread_cond_signal(&pcond_)); - } + void Notify() { + XCHECK(!pthread_cond_signal(&pcond_)); + } - void NotifyAll() { - XCHECK(!pthread_cond_broadcast(&pcond_)); - } + void NotifyAll() { + XCHECK(!pthread_cond_broadcast(&pcond_)); + } -private: - MutexLock& mutex_; - pthread_cond_t pcond_; + private: + MutexLock& mutex_; + pthread_cond_t pcond_; }; // class Condition } // namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/Config.hpp b/libchinese-segmentation/cppjieba/limonp/Config.hpp index 101b24e..c98f222 100644 --- a/libchinese-segmentation/cppjieba/limonp/Config.hpp +++ b/libchinese-segmentation/cppjieba/limonp/Config.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ /************************************ * file enc : utf8 * author : wuyanyi09@gmail.com @@ -34,86 +16,86 @@ namespace limonp { using namespace std; class Config { -public: - explicit Config(const string& filePath) { - LoadFile(filePath); - } + public: + explicit Config(const string& filePath) { + LoadFile(filePath); + } - operator bool () { - return !map_.empty(); - } + operator bool () { + return !map_.empty(); + } - string Get(const string& key, const string& defaultvalue) const { - map::const_iterator it = map_.find(key); - if(map_.end() != it) { - return it->second; - } - return defaultvalue; + string Get(const string& key, const string& defaultvalue) const { + map::const_iterator it = map_.find(key); + if(map_.end() != it) { + return it->second; } - int Get(const string& key, int defaultvalue) const { - string str = Get(key, ""); - if("" == str) { - return defaultvalue; - } - return atoi(str.c_str()); + return defaultvalue; + } + int Get(const string& key, int defaultvalue) const { + string str = Get(key, ""); + if("" == str) { + return defaultvalue; } - const char* operator [](const char* key) const { - if(NULL == key) { - return NULL; - } - map::const_iterator it = map_.find(key); - if(map_.end() != it) { - return it->second.c_str(); - } - return NULL; + return atoi(str.c_str()); + } + const char* operator [] (const char* key) const { + if(NULL == key) { + return NULL; } + map::const_iterator it = map_.find(key); + if(map_.end() != it) { + return it->second.c_str(); + } + return NULL; + } - string GetConfigInfo() const { - string res; - res << *this; - return res; + string GetConfigInfo() const { + string res; + res << *this; + return res; + } + + private: + void LoadFile(const string& filePath) { + ifstream ifs(filePath.c_str()); + assert(ifs); + string line; + vector vecBuf; + size_t lineno = 0; + while(getline(ifs, line)) { + lineno ++; + Trim(line); + if(line.empty() || StartsWith(line, "#")) { + continue; + } + vecBuf.clear(); + Split(line, vecBuf, "="); + if(2 != vecBuf.size()) { + fprintf(stderr, "line[%s] illegal.\n", line.c_str()); + assert(false); + continue; + } + string& key = vecBuf[0]; + string& value = vecBuf[1]; + Trim(key); + Trim(value); + if(!map_.insert(make_pair(key, value)).second) { + fprintf(stderr, "key[%s] already exits.\n", key.c_str()); + assert(false); + continue; + } } + ifs.close(); + } -private: - void LoadFile(const string& filePath) { - ifstream ifs(filePath.c_str()); - assert(ifs); - string line; - vector vecBuf; - size_t lineno = 0; - while(getline(ifs, line)) { - lineno ++; - Trim(line); - if(line.empty() || StartsWith(line, "#")) { - continue; - } - vecBuf.clear(); - Split(line, vecBuf, "="); - if(2 != vecBuf.size()) { - fprintf(stderr, "line[%s] illegal.\n", line.c_str()); - assert(false); - continue; - } - string& key = vecBuf[0]; - string& value = vecBuf[1]; - Trim(key); - Trim(value); - if(!map_.insert(make_pair(key, value)).second) { - fprintf(stderr, "key[%s] already exits.\n", key.c_str()); - assert(false); - continue; - } - } - ifs.close(); - } + friend ostream& operator << (ostream& os, const Config& config); - friend ostream& operator << (ostream& os, const Config& config); - - map map_; + map map_; }; // class Config inline ostream& operator << (ostream& os, const Config& config) { - return os << config.map_; + return os << config.map_; } } // namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/FileLock.hpp b/libchinese-segmentation/cppjieba/limonp/FileLock.hpp index 3cfe5e1..56a478a 100644 --- a/libchinese-segmentation/cppjieba/limonp/FileLock.hpp +++ b/libchinese-segmentation/cppjieba/limonp/FileLock.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_FILELOCK_HPP #define LIMONP_FILELOCK_HPP @@ -33,58 +15,58 @@ namespace limonp { using std::string; class FileLock { -public: - FileLock() : fd_(-1), ok_(true) { + public: + FileLock() : fd_(-1), ok_(true) { + } + ~FileLock() { + if(fd_ > 0) { + Close(); } - ~FileLock() { - if(fd_ > 0) { - Close(); - } + } + void Open(const string& fname) { + assert(fd_ == -1); + fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644); + if(fd_ < 0) { + ok_ = false; + err_ = strerror(errno); } - void Open(const string& fname) { - assert(fd_ == -1); - fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644); - if(fd_ < 0) { - ok_ = false; - err_ = strerror(errno); - } + } + void Close() { + ::close(fd_); + } + void Lock() { + if(LockOrUnlock(fd_, true) < 0) { + ok_ = false; + err_ = strerror(errno); } - void Close() { - ::close(fd_); - } - void Lock() { - if(LockOrUnlock(fd_, true) < 0) { - ok_ = false; - err_ = strerror(errno); - } - } - void UnLock() { - if(LockOrUnlock(fd_, false) < 0) { - ok_ = false; - err_ = strerror(errno); - } - } - bool Ok() const { - return ok_; - } - string Error() const { - return err_; - } -private: - static int LockOrUnlock(int fd, bool lock) { - errno = 0; - struct flock f; - memset(&f, 0, sizeof(f)); - f.l_type = (lock ? F_WRLCK : F_UNLCK); - f.l_whence = SEEK_SET; - f.l_start = 0; - f.l_len = 0; // Lock/unlock entire file - return fcntl(fd, F_SETLK, &f); + } + void UnLock() { + if(LockOrUnlock(fd_, false) < 0) { + ok_ = false; + err_ = strerror(errno); } + } + bool Ok() const { + return ok_; + } + string Error() const { + return err_; + } + private: + static int LockOrUnlock(int fd, bool lock) { + errno = 0; + struct flock f; + memset(&f, 0, sizeof(f)); + f.l_type = (lock ? F_WRLCK : F_UNLCK); + f.l_whence = SEEK_SET; + f.l_start = 0; + f.l_len = 0; // Lock/unlock entire file + return fcntl(fd, F_SETLK, &f); + } - int fd_; - bool ok_; - string err_; + int fd_; + bool ok_; + string err_; }; // class FileLock }// namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp b/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp index 5198df9..2076682 100644 --- a/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp +++ b/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_FORCE_PUBLIC_H #define LIMONP_FORCE_PUBLIC_H diff --git a/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp b/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp index 0eba40b..d9b0805 100644 --- a/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp +++ b/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_LOCAL_VECTOR_HPP #define LIMONP_LOCAL_VECTOR_HPP @@ -33,123 +15,126 @@ using namespace std; const size_t LOCAL_VECTOR_BUFFER_SIZE = 16; template class LocalVector { -public: - typedef const T* const_iterator ; - typedef T value_type; - typedef size_t size_type; -private: - T buffer_[LOCAL_VECTOR_BUFFER_SIZE]; - T * ptr_; - size_t size_; - size_t capacity_; -public: - LocalVector() { - init_(); - }; - LocalVector(const LocalVector& vec) { - init_(); - *this = vec; + public: + typedef const T* const_iterator ; + typedef T value_type; + typedef size_t size_type; + private: + T buffer_[LOCAL_VECTOR_BUFFER_SIZE]; + T * ptr_; + size_t size_; + size_t capacity_; + public: + LocalVector() { + init_(); + }; + LocalVector(const LocalVector& vec) { + init_(); + *this = vec; + } + LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster + init_(); + while(begin != end) { + push_back(*begin++); } - LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster - init_(); - while(begin != end) { - push_back(*begin++); - } + } + LocalVector(size_t size, const T& t) { // TODO: make it faster + init_(); + while(size--) { + push_back(t); } - LocalVector(size_t size, const T& t) { // TODO: make it faster - init_(); - while(size--) { - push_back(t); - } + } + ~LocalVector() { + if(ptr_ != buffer_) { + free(ptr_); } - ~LocalVector() { - if(ptr_ != buffer_) { - free(ptr_); - } - }; -public: - LocalVector& operator = (const LocalVector& vec) { - clear(); - size_ = vec.size(); - capacity_ = vec.capacity(); - if(vec.buffer_ == vec.ptr_) { - memcpy(buffer_, vec.buffer_, sizeof(T) * size_); - ptr_ = buffer_; - } else { - ptr_ = (T*) malloc(vec.capacity() * sizeof(T)); - assert(ptr_); - memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T)); - } - return *this; + }; + public: + LocalVector& operator = (const LocalVector& vec) { + if(this == &vec){ + return *this; + } + clear(); + size_ = vec.size(); + capacity_ = vec.capacity(); + if(vec.buffer_ == vec.ptr_) { + memcpy(buffer_, vec.buffer_, sizeof(T) * size_); + ptr_ = buffer_; + } else { + ptr_ = (T*) malloc(vec.capacity() * sizeof(T)); + assert(ptr_); + memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T)); } -private: - void init_() { - ptr_ = buffer_; - size_ = 0; - capacity_ = LOCAL_VECTOR_BUFFER_SIZE; + return *this; + } + private: + void init_() { + ptr_ = buffer_; + size_ = 0; + capacity_ = LOCAL_VECTOR_BUFFER_SIZE; + } + public: + T& operator [] (size_t i) { + return ptr_[i]; + } + const T& operator [] (size_t i) const { + return ptr_[i]; + } + void push_back(const T& t) { + if(size_ == capacity_) { + assert(capacity_); + reserve(capacity_ * 2); } -public: - T& operator [](size_t i) { - return ptr_[i]; + ptr_[size_ ++ ] = t; + } + void reserve(size_t size) { + if(size <= capacity_) { + return; } - const T& operator [](size_t i) const { - return ptr_[i]; + T * next = (T*)malloc(sizeof(T) * size); + assert(next); + T * old = ptr_; + ptr_ = next; + memcpy(ptr_, old, sizeof(T) * capacity_); + capacity_ = size; + if(old != buffer_) { + free(old); } - void push_back(const T& t) { - if(size_ == capacity_) { - assert(capacity_); - reserve(capacity_ * 2); - } - ptr_[size_ ++ ] = t; - } - void reserve(size_t size) { - if(size <= capacity_) { - return; - } - T * next = (T*)malloc(sizeof(T) * size); - assert(next); - T * old = ptr_; - ptr_ = next; - memcpy(ptr_, old, sizeof(T) * capacity_); - capacity_ = size; - if(old != buffer_) { - free(old); - } - } - bool empty() const { - return 0 == size(); - } - size_t size() const { - return size_; - } - size_t capacity() const { - return capacity_; - } - const_iterator begin() const { - return ptr_; - } - const_iterator end() const { - return ptr_ + size_; - } - void clear() { - if(ptr_ != buffer_) { - free(ptr_); - } - init_(); + } + bool empty() const { + return 0 == size(); + } + size_t size() const { + return size_; + } + size_t capacity() const { + return capacity_; + } + const_iterator begin() const { + return ptr_; + } + const_iterator end() const { + return ptr_ + size_; + } + void clear() { + if(ptr_ != buffer_) { + free(ptr_); } + init_(); + } }; template ostream & operator << (ostream& os, const LocalVector& vec) { - if(vec.empty()) { - return os << "[]"; - } - os << "[\"" << vec[0]; - for(size_t i = 1; i < vec.size(); i++) { - os << "\", \"" << vec[i]; - } - os << "\"]"; - return os; + if(vec.empty()) { + return os << "[]"; + } + os<<"[\""<. - * - * - */ #ifndef LIMONP_LOGGING_HPP #define LIMONP_LOGGING_HPP @@ -38,55 +20,56 @@ namespace limonp { enum { - LL_DEBUG = 0, - LL_INFO = 1, - LL_WARNING = 2, - LL_ERROR = 3, - LL_FATAL = 4, + LL_DEBUG = 0, + LL_INFO = 1, + LL_WARNING = 2, + LL_ERROR = 3, + LL_FATAL = 4, }; // enum -static const char * LOG_LEVEL_ARRAY[] = {"DEBUG", "INFO", "WARN", "ERROR", "FATAL"}; -static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"; +static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"}; class Logger { -public: - Logger(size_t level, const char* filename, int lineno) - : level_(level) { + public: + Logger(size_t level, const char* filename, int lineno) + : level_(level) { #ifdef LOGGING_LEVEL - if(level_ < LOGGING_LEVEL) { - return; - } + if (level_ < LOGGING_LEVEL) { + return; + } #endif - assert(level_ <= sizeof(LOG_LEVEL_ARRAY) / sizeof(*LOG_LEVEL_ARRAY)); - char buf[32]; - time_t now; - time(&now); - strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&now)); - stream_ << buf - << " " << filename - << ":" << lineno - << " " << LOG_LEVEL_ARRAY[level_] - << " "; - } - ~Logger() { + assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY)); + char buf[32]; + time_t now; + time(&now); + struct tm result; + localtime_r(&now, &result); + strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &result); + stream_ << buf + << " " << filename + << ":" << lineno + << " " << LOG_LEVEL_ARRAY[level_] + << " "; + } + ~Logger() { #ifdef LOGGING_LEVEL - if(level_ < LOGGING_LEVEL) { - return; - } + if (level_ < LOGGING_LEVEL) { + return; + } #endif - std::cerr << stream_.str() << std::endl; - if(level_ == LL_FATAL) { - abort(); - } + std::cerr << stream_.str() << std::endl; + if (level_ == LL_FATAL) { + abort(); } + } - std::ostream& Stream() { - return stream_; - } + std::ostream& Stream() { + return stream_; + } -private: - std::ostringstream stream_; - size_t level_; + private: + std::ostringstream stream_; + size_t level_; }; // class Logger } // namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp b/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp index ebe8a07..ea10d6d 100644 --- a/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp +++ b/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_MUTEX_LOCK_HPP #define LIMONP_MUTEX_LOCK_HPP @@ -26,40 +8,40 @@ namespace limonp { class MutexLock: NonCopyable { -public: - MutexLock() { - XCHECK(!pthread_mutex_init(&mutex_, NULL)); - } - ~MutexLock() { - XCHECK(!pthread_mutex_destroy(&mutex_)); - } - pthread_mutex_t* GetPthreadMutex() { - return &mutex_; - } + public: + MutexLock() { + XCHECK(!pthread_mutex_init(&mutex_, NULL)); + } + ~MutexLock() { + XCHECK(!pthread_mutex_destroy(&mutex_)); + } + pthread_mutex_t* GetPthreadMutex() { + return &mutex_; + } -private: - void Lock() { - XCHECK(!pthread_mutex_lock(&mutex_)); - } - void Unlock() { - XCHECK(!pthread_mutex_unlock(&mutex_)); - } - friend class MutexLockGuard; + private: + void Lock() { + XCHECK(!pthread_mutex_lock(&mutex_)); + } + void Unlock() { + XCHECK(!pthread_mutex_unlock(&mutex_)); + } + friend class MutexLockGuard; - pthread_mutex_t mutex_; + pthread_mutex_t mutex_; }; // class MutexLock class MutexLockGuard: NonCopyable { -public: - explicit MutexLockGuard(MutexLock & mutex) - : mutex_(mutex) { - mutex_.Lock(); - } - ~MutexLockGuard() { - mutex_.Unlock(); - } -private: - MutexLock & mutex_; + public: + explicit MutexLockGuard(MutexLock & mutex) + : mutex_(mutex) { + mutex_.Lock(); + } + ~MutexLockGuard() { + mutex_.Unlock(); + } + private: + MutexLock & mutex_; }; // class MutexLockGuard #define MutexLockGuard(x) XCHECK(false); diff --git a/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp b/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp index b224f2c..145400f 100644 --- a/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp +++ b/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp @@ -1,35 +1,19 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ +/************************************ + ************************************/ #ifndef LIMONP_NONCOPYABLE_H #define LIMONP_NONCOPYABLE_H namespace limonp { class NonCopyable { -protected: - NonCopyable() { - } - ~NonCopyable() { - } -private: - NonCopyable(const NonCopyable&); - const NonCopyable& operator=(const NonCopyable&); + protected: + NonCopyable() { + } + ~NonCopyable() { + } + private: + NonCopyable(const NonCopyable& ); + const NonCopyable& operator=(const NonCopyable& ); }; // class NonCopyable } // namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp b/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp index 173bcb2..cf00e94 100644 --- a/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp +++ b/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_STD_EXTEMSION_HPP #define LIMONP_STD_EXTEMSION_HPP @@ -51,123 +33,123 @@ namespace std { template ostream& operator << (ostream& os, const vector& v) { - if(v.empty()) { - return os << "[]"; - } - os << "[" << v[0]; - for(size_t i = 1; i < v.size(); i++) { - os << ", " << v[i]; - } - os << "]"; - return os; + if(v.empty()) { + return os << "[]"; + } + os<<"["< inline ostream& operator << (ostream& os, const vector& v) { - if(v.empty()) { - return os << "[]"; - } - os << "[\"" << v[0]; - for(size_t i = 1; i < v.size(); i++) { - os << "\", \"" << v[i]; - } - os << "\"]"; - return os; + if(v.empty()) { + return os << "[]"; + } + os<<"[\""< ostream& operator << (ostream& os, const deque& dq) { - if(dq.empty()) { - return os << "[]"; - } - os << "[\"" << dq[0]; - for(size_t i = 1; i < dq.size(); i++) { - os << "\", \"" << dq[i]; - } - os << "\"]"; - return os; + if(dq.empty()) { + return os << "[]"; + } + os<<"[\""< ostream& operator << (ostream& os, const pair& pr) { - os << pr.first << ":" << pr.second ; - return os; + os << pr.first << ":" << pr.second ; + return os; } template string& operator << (string& str, const T& obj) { - stringstream ss; - ss << obj; // call ostream& operator << (ostream& os, - return str = ss.str(); + stringstream ss; + ss << obj; // call ostream& operator << (ostream& os, + return str = ss.str(); } template ostream& operator << (ostream& os, const map& mp) { - if(mp.empty()) { - os << "{}"; - return os; - } - os << '{'; - typename map::const_iterator it = mp.begin(); - os << *it; - it++; - while(it != mp.end()) { - os << ", " << *it; - it++; - } - os << '}'; + if(mp.empty()) { + os<<"{}"; return os; + } + os<<'{'; + typename map::const_iterator it = mp.begin(); + os<<*it; + it++; + while(it != mp.end()) { + os<<", "<<*it; + it++; + } + os<<'}'; + return os; } template ostream& operator << (ostream& os, const std::unordered_map& mp) { - if(mp.empty()) { - return os << "{}"; - } - os << '{'; - typename std::unordered_map::const_iterator it = mp.begin(); - os << *it; - it++; - while(it != mp.end()) { - os << ", " << *it++; - } - return os << '}'; + if(mp.empty()) { + return os << "{}"; + } + os<<'{'; + typename std::unordered_map::const_iterator it = mp.begin(); + os<<*it; + it++; + while(it != mp.end()) { + os<<", "<<*it++; + } + return os<<'}'; } template ostream& operator << (ostream& os, const set& st) { - if(st.empty()) { - os << "{}"; - return os; - } - os << '{'; - typename set::const_iterator it = st.begin(); - os << *it; - it++; - while(it != st.end()) { - os << ", " << *it; - it++; - } - os << '}'; + if(st.empty()) { + os << "{}"; return os; + } + os<<'{'; + typename set::const_iterator it = st.begin(); + os<<*it; + it++; + while(it != st.end()) { + os<<", "<<*it; + it++; + } + os<<'}'; + return os; } template bool IsIn(const ContainType& contain, const KeyType& key) { - return contain.end() != contain.find(key); + return contain.end() != contain.find(key); } template basic_string & operator << (basic_string & s, ifstream & ifs) { - return s.assign((istreambuf_iterator(ifs)), istreambuf_iterator()); + return s.assign((istreambuf_iterator(ifs)), istreambuf_iterator()); } template ofstream & operator << (ofstream & ofs, const basic_string& s) { - ostreambuf_iterator itr(ofs); - copy(s.begin(), s.end(), itr); - return ofs; + ostreambuf_iterator itr (ofs); + copy(s.begin(), s.end(), itr); + return ofs; } } // namespace std diff --git a/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp b/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp index c785193..c3800af 100644 --- a/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp +++ b/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp @@ -1,27 +1,14 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ /************************************ * file enc : ascii * author : wuyanyi09@gmail.com ************************************/ #ifndef LIMONP_STR_FUNCTS_H #define LIMONP_STR_FUNCTS_H +#include +#include +#include +#include +#include #include #include #include @@ -29,14 +16,9 @@ #include #include #include -#include -#include -#include -#include #include #include #include -#include #include #include #include "StdExtension.hpp" @@ -44,339 +26,356 @@ namespace limonp { using namespace std; inline string StringFormat(const char* fmt, ...) { - int size = 256; - std::string str; - va_list ap; - while(1) { - str.resize(size); - va_start(ap, fmt); - int n = vsnprintf((char *)str.c_str(), size, fmt, ap); - va_end(ap); - if(n > -1 && n < size) { - str.resize(n); - return str; - } - if(n > -1) - size = n + 1; - else - size *= 2; + int size = 256; + std::string str; + va_list ap; + while (1) { + str.resize(size); + va_start(ap, fmt); + int n = vsnprintf((char *)str.c_str(), size, fmt, ap); + va_end(ap); + if (n > -1 && n < size) { + str.resize(n); + return str; } - return str; + if (n > -1) + size = n + 1; + else + size *= 2; + } + return str; } template void Join(T begin, T end, string& res, const string& connector) { - if(begin == end) { - return; - } - stringstream ss; - ss << *begin; - begin++; - while(begin != end) { - ss << connector << *begin; - begin ++; - } - res = ss.str(); + if(begin == end) { + return; + } + stringstream ss; + ss<<*begin; + begin++; + while(begin != end) { + ss << connector << *begin; + begin ++; + } + res = ss.str(); } template string Join(T begin, T end, const string& connector) { - string res; - Join(begin, end, res, connector); - return res; + string res; + Join(begin ,end, res, connector); + return res; } inline string& Upper(string& str) { - transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper); - return str; + transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper); + return str; } inline string& Lower(string& str) { - transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower); - return str; + transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower); + return str; } inline bool IsSpace(unsigned c) { - // when passing large int as the argument of isspace, it core dump, so here need a type cast. - return c > 0xff ? false : std::isspace(c & 0xff) != 0; + // when passing large int as the argument of isspace, it core dump, so here need a type cast. + return c > 0xff ? false : std::isspace(c & 0xff); } inline std::string& LTrim(std::string &s) { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(IsSpace)))); - return s; + s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(IsSpace)))); + return s; } inline std::string& RTrim(std::string &s) { - s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(IsSpace))).base(), s.end()); - return s; + s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(IsSpace))).base(), s.end()); + return s; } inline std::string& Trim(std::string &s) { - return LTrim(RTrim(s)); + return LTrim(RTrim(s)); } inline std::string& LTrim(std::string & s, char x) { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to(), x)))); - return s; + s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to(), x)))); + return s; } inline std::string& RTrim(std::string & s, char x) { - s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to(), x))).base(), s.end()); - return s; + s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to(), x))).base(), s.end()); + return s; } inline std::string& Trim(std::string &s, char x) { - return LTrim(RTrim(s, x), x); + return LTrim(RTrim(s, x), x); } inline void Split(const string& src, vector& res, const string& pattern, size_t maxsplit = string::npos) { - res.clear(); - size_t Start = 0; - size_t end = 0; - string sub; - while(Start < src.size()) { - end = src.find_first_of(pattern, Start); - if(string::npos == end || res.size() >= maxsplit) { - sub = src.substr(Start); - res.push_back(sub); - return; - } - sub = src.substr(Start, end - Start); - res.push_back(sub); - Start = end + 1; + res.clear(); + size_t Start = 0; + size_t end = 0; + string sub; + while(Start < src.size()) { + end = src.find_first_of(pattern, Start); + if(string::npos == end || res.size() >= maxsplit) { + sub = src.substr(Start); + res.push_back(sub); + return; } - return; + sub = src.substr(Start, end - Start); + res.push_back(sub); + Start = end + 1; + } + return; } inline vector Split(const string& src, const string& pattern, size_t maxsplit = string::npos) { - vector res; - Split(src, res, pattern, maxsplit); - return res; + vector res; + Split(src, res, pattern, maxsplit); + return res; } inline bool StartsWith(const string& str, const string& prefix) { - if(prefix.length() > str.length()) { - return false; - } - return 0 == str.compare(0, prefix.length(), prefix); + if(prefix.length() > str.length()) { + return false; + } + return 0 == str.compare(0, prefix.length(), prefix); } inline bool EndsWith(const string& str, const string& suffix) { - if(suffix.length() > str.length()) { - return false; - } - return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix); + if(suffix.length() > str.length()) { + return false; + } + return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix); } inline bool IsInStr(const string& str, char ch) { - return str.find(ch) != string::npos; + return str.find(ch) != string::npos; } inline uint16_t TwocharToUint16(char high, char low) { - return (((uint16_t(high) & 0x00ff) << 8) | (uint16_t(low) & 0x00ff)); + return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff)); } template bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) { - if(!str) { - return false; + if(!str) { + return false; + } + char ch1, ch2; + uint16_t tmp; + vec.clear(); + for(size_t i = 0; i < len;) { + if(!(str[i] & 0x80)) { // 0xxxxxxx + vec.push_back(str[i]); + i++; + } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx + ch1 = (str[i] >> 2) & 0x07; + ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 ); + tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff)); + vec.push_back(tmp); + i += 2; + } else if((uint8_t)str[i] <= 0xef && i + 2 < len) { + ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f ); + ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f); + tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff)); + vec.push_back(tmp); + i += 3; + } else { + return false; } - char ch1, ch2; - uint16_t tmp; - vec.clear(); - for(size_t i = 0; i < len;) { - if(!(str[i] & 0x80)) { // 0xxxxxxx - vec.push_back(str[i]); - i++; - } else if((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx - ch1 = (str[i] >> 2) & 0x07; - ch2 = (str[i + 1] & 0x3f) | ((str[i] & 0x03) << 6); - tmp = (((uint16_t(ch1) & 0x00ff) << 8) | (uint16_t(ch2) & 0x00ff)); - vec.push_back(tmp); - i += 2; - } else if((uint8_t)str[i] <= 0xef && i + 2 < len) { - ch1 = ((uint8_t)str[i] << 4) | ((str[i + 1] >> 2) & 0x0f); - ch2 = (((uint8_t)str[i + 1] << 6) & 0xc0) | (str[i + 2] & 0x3f); - tmp = (((uint16_t(ch1) & 0x00ff) << 8) | (uint16_t(ch2) & 0x00ff)); - vec.push_back(tmp); - i += 3; - } else { - return false; - } - } - return true; + } + return true; } template bool Utf8ToUnicode(const string& str, Uint16Container& vec) { - return Utf8ToUnicode(str.c_str(), str.size(), vec); + return Utf8ToUnicode(str.c_str(), str.size(), vec); +} + +template +bool Utf8ToUnicode32(const char * str, size_t size, Uint32Container& vec) { + uint32_t tmp; + vec.clear(); + for(size_t i = 0; i < size;) { + if(!(str[i] & 0x80)) { // 0xxxxxxx + // 7bit, total 7bit + tmp = (uint8_t)(str[i]) & 0x7f; + i++; + } else if ((uint8_t)str[i] <= 0xdf && i + 1 < size) { // 110xxxxxx + // 5bit, total 5bit + tmp = (uint8_t)(str[i]) & 0x1f; + + // 6bit, total 11bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+1]) & 0x3f; + i += 2; + } else if((uint8_t)str[i] <= 0xef && i + 2 < size) { // 1110xxxxxx + // 4bit, total 4bit + tmp = (uint8_t)(str[i]) & 0x0f; + + // 6bit, total 10bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+1]) & 0x3f; + + // 6bit, total 16bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+2]) & 0x3f; + + i += 3; + } else if((uint8_t)str[i] <= 0xf7 && i + 3 < size) { // 11110xxxx + // 3bit, total 3bit + tmp = (uint8_t)(str[i]) & 0x07; + + // 6bit, total 9bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+1]) & 0x3f; + + // 6bit, total 15bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+2]) & 0x3f; + + // 6bit, total 21bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+3]) & 0x3f; + + i += 4; + } else { + return false; + } + vec.push_back(tmp); + } + return true; } template bool Utf8ToUnicode32(const string& str, Uint32Container& vec) { - uint32_t tmp; - vec.clear(); - for(size_t i = 0; i < str.size();) { - if(!(str[i] & 0x80)) { // 0xxxxxxx - // 7bit, total 7bit - tmp = (uint8_t)(str[i]) & 0x7f; - i++; - } else if((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx - // 5bit, total 5bit - tmp = (uint8_t)(str[i]) & 0x1f; + return Utf8ToUnicode32(str.data(), str.size(), vec); +} - // 6bit, total 11bit - tmp <<= 6; - tmp |= (uint8_t)(str[i + 1]) & 0x3f; - i += 2; - } else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx - // 4bit, total 4bit - tmp = (uint8_t)(str[i]) & 0x0f; - - // 6bit, total 10bit - tmp <<= 6; - tmp |= (uint8_t)(str[i + 1]) & 0x3f; - - // 6bit, total 16bit - tmp <<= 6; - tmp |= (uint8_t)(str[i + 2]) & 0x3f; - - i += 3; - } else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx - // 3bit, total 3bit - tmp = (uint8_t)(str[i]) & 0x07; - - // 6bit, total 9bit - tmp <<= 6; - tmp |= (uint8_t)(str[i + 1]) & 0x3f; - - // 6bit, total 15bit - tmp <<= 6; - tmp |= (uint8_t)(str[i + 2]) & 0x3f; - - // 6bit, total 21bit - tmp <<= 6; - tmp |= (uint8_t)(str[i + 3]) & 0x3f; - - i += 4; - } else { - return false; - } - vec.push_back(tmp); +inline int UnicodeToUtf8Bytes(uint32_t ui){ + if(ui <= 0x7f) { + return 1; + } else if(ui <= 0x7ff) { + return 2; + } else if(ui <= 0xffff) { + return 3; + } else { + return 4; } - return true; } template void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) { - res.clear(); - uint32_t ui; - while(begin != end) { - ui = *begin; - if(ui <= 0x7f) { - res += char(ui); - } else if(ui <= 0x7ff) { - res += char(((ui >> 6) & 0x1f) | 0xc0); - res += char((ui & 0x3f) | 0x80); - } else if(ui <= 0xffff) { - res += char(((ui >> 12) & 0x0f) | 0xe0); - res += char(((ui >> 6) & 0x3f) | 0x80); - res += char((ui & 0x3f) | 0x80); - } else { - res += char(((ui >> 18) & 0x03) | 0xf0); - res += char(((ui >> 12) & 0x3f) | 0x80); - res += char(((ui >> 6) & 0x3f) | 0x80); - res += char((ui & 0x3f) | 0x80); - } - begin ++; + res.clear(); + uint32_t ui; + while(begin != end) { + ui = *begin; + if(ui <= 0x7f) { + res += char(ui); + } else if(ui <= 0x7ff) { + res += char(((ui >> 6) & 0x1f) | 0xc0); + res += char((ui & 0x3f) | 0x80); + } else if(ui <= 0xffff) { + res += char(((ui >> 12) & 0x0f) | 0xe0); + res += char(((ui >> 6) & 0x3f) | 0x80); + res += char((ui & 0x3f) | 0x80); + } else { + res += char(((ui >> 18) & 0x03) | 0xf0); + res += char(((ui >> 12) & 0x3f) | 0x80); + res += char(((ui >> 6) & 0x3f) | 0x80); + res += char((ui & 0x3f) | 0x80); } + begin ++; + } } template void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) { - res.clear(); - uint16_t ui; - while(begin != end) { - ui = *begin; - if(ui <= 0x7f) { - res += char(ui); - } else if(ui <= 0x7ff) { - res += char(((ui >> 6) & 0x1f) | 0xc0); - res += char((ui & 0x3f) | 0x80); - } else { - res += char(((ui >> 12) & 0x0f) | 0xe0); - res += char(((ui >> 6) & 0x3f) | 0x80); - res += char((ui & 0x3f) | 0x80); - } - begin ++; + res.clear(); + uint16_t ui; + while(begin != end) { + ui = *begin; + if(ui <= 0x7f) { + res += char(ui); + } else if(ui <= 0x7ff) { + res += char(((ui>>6) & 0x1f) | 0xc0); + res += char((ui & 0x3f) | 0x80); + } else { + res += char(((ui >> 12) & 0x0f )| 0xe0); + res += char(((ui>>6) & 0x3f )| 0x80 ); + res += char((ui & 0x3f) | 0x80); } + begin ++; + } } template bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) { - vec.clear(); - if(!str) { - return true; - } - size_t i = 0; - while(i < len) { - if(0 == (str[i] & 0x80)) { - vec.push_back(uint16_t(str[i])); - i++; - } else { - if(i + 1 < len) { //&& (str[i+1] & 0x80)) - uint16_t tmp = (((uint16_t(str[i]) & 0x00ff) << 8) | (uint16_t(str[i + 1]) & 0x00ff)); - vec.push_back(tmp); - i += 2; - } else { - return false; - } - } - } + vec.clear(); + if(!str) { return true; + } + size_t i = 0; + while(i < len) { + if(0 == (str[i] & 0x80)) { + vec.push_back(uint16_t(str[i])); + i++; + } else { + if(i + 1 < len) { //&& (str[i+1] & 0x80)) + uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff)); + vec.push_back(tmp); + i += 2; + } else { + return false; + } + } + } + return true; } template bool GBKTrans(const string& str, Uint16Container& vec) { - return GBKTrans(str.c_str(), str.size(), vec); + return GBKTrans(str.c_str(), str.size(), vec); } template void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) { - res.clear(); - //pair pa; - char first, second; - while(begin != end) { - //pa = uint16ToChar2(*begin); - first = ((*begin) >> 8) & 0x00ff; - second = (*begin) & 0x00ff; - if(first & 0x80) { - res += first; - res += second; - } else { - res += second; - } - begin++; + res.clear(); + //pair pa; + char first, second; + while(begin != end) { + //pa = uint16ToChar2(*begin); + first = ((*begin)>>8) & 0x00ff; + second = (*begin) & 0x00ff; + if(first & 0x80) { + res += first; + res += second; + } else { + res += second; } + begin++; + } } /* * format example: "%Y-%m-%d %H:%M:%S" */ -inline void GetTime(const string& format, string& timeStr) { - time_t timeNow; - time(&timeNow); - timeStr.resize(64); - size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow)); - timeStr.resize(len); -} +// inline void GetTime(const string& format, string& timeStr) { +// time_t timeNow; +// time(&timeNow); +// timeStr.resize(64); +// size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow)); +// timeStr.resize(len); +// } inline string PathJoin(const string& path1, const string& path2) { - if(EndsWith(path1, "/")) { - return path1 + path2; - } - return path1 + "/" + path2; + if(EndsWith(path1, "/")) { + return path1 + path2; + } + return path1 + "/" + path2; } } diff --git a/libchinese-segmentation/cppjieba/limonp/Thread.hpp b/libchinese-segmentation/cppjieba/limonp/Thread.hpp index 4db0a3c..4e3c084 100644 --- a/libchinese-segmentation/cppjieba/limonp/Thread.hpp +++ b/libchinese-segmentation/cppjieba/limonp/Thread.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_THREAD_HPP #define LIMONP_THREAD_HPP @@ -25,36 +7,36 @@ namespace limonp { class IThread: NonCopyable { -public: - IThread(): isStarted(false), isJoined(false) { + public: + IThread(): isStarted(false), isJoined(false) { + } + virtual ~IThread() { + if(isStarted && !isJoined) { + XCHECK(!pthread_detach(thread_)); } - virtual ~IThread() { - if(isStarted && !isJoined) { - XCHECK(!pthread_detach(thread_)); - } - }; + }; - virtual void Run() = 0; - void Start() { - XCHECK(!isStarted); - XCHECK(!pthread_create(&thread_, NULL, Worker, this)); - isStarted = true; - } - void Join() { - XCHECK(!isJoined); - XCHECK(!pthread_join(thread_, NULL)); - isJoined = true; - } -private: - static void * Worker(void * data) { - IThread * ptr = (IThread*) data; - ptr->Run(); - return NULL; - } + virtual void Run() = 0; + void Start() { + XCHECK(!isStarted); + XCHECK(!pthread_create(&thread_, NULL, Worker, this)); + isStarted = true; + } + void Join() { + XCHECK(!isJoined); + XCHECK(!pthread_join(thread_, NULL)); + isJoined = true; + } + private: + static void * Worker(void * data) { + IThread * ptr = (IThread* ) data; + ptr->Run(); + return NULL; + } - pthread_t thread_; - bool isStarted; - bool isJoined; + pthread_t thread_; + bool isStarted; + bool isJoined; }; // class IThread } // namespace limonp diff --git a/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp b/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp index 47a5c97..fb0ee57 100644 --- a/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp +++ b/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp @@ -1,21 +1,3 @@ -/* - * Copyright (C) 2020, KylinSoft Co., Ltd. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - */ #ifndef LIMONP_THREAD_POOL_HPP #define LIMONP_THREAD_POOL_HPP @@ -30,73 +12,73 @@ using namespace std; //class ThreadPool; class ThreadPool: NonCopyable { -public: - class Worker: public IThread { - public: - Worker(ThreadPool* pool): ptThreadPool_(pool) { - assert(ptThreadPool_); - } - virtual ~Worker() { - } - - virtual void Run() { - while(true) { - ClosureInterface* closure = ptThreadPool_->queue_.Pop(); - if(closure == NULL) { - break; - } - try { - closure->Run(); - } catch(std::exception& e) { - XLOG(ERROR) << e.what(); - } catch(...) { - XLOG(ERROR) << " unknown exception."; - } - delete closure; - } - } - private: - ThreadPool * ptThreadPool_; - }; // class Worker - - ThreadPool(size_t thread_num) - : threads_(thread_num), - queue_(thread_num) { - assert(thread_num); - for(size_t i = 0; i < threads_.size(); i ++) { - threads_[i] = new Worker(this); - } + public: + class Worker: public IThread { + public: + Worker(ThreadPool* pool): ptThreadPool_(pool) { + assert(ptThreadPool_); } - ~ThreadPool() { - Stop(); + virtual ~Worker() { } - void Start() { - for(size_t i = 0; i < threads_.size(); i++) { - threads_[i]->Start(); + virtual void Run() { + while (true) { + ClosureInterface* closure = ptThreadPool_->queue_.Pop(); + if (closure == NULL) { + break; } - } - void Stop() { - for(size_t i = 0; i < threads_.size(); i ++) { - queue_.Push(NULL); + try { + closure->Run(); + } catch(std::exception& e) { + XLOG(ERROR) << e.what(); + } catch(...) { + XLOG(ERROR) << " unknown exception."; } - for(size_t i = 0; i < threads_.size(); i ++) { - threads_[i]->Join(); - delete threads_[i]; - } - threads_.clear(); + delete closure; + } } + private: + ThreadPool * ptThreadPool_; + }; // class Worker - void Add(ClosureInterface* task) { - assert(task); - queue_.Push(task); + ThreadPool(size_t thread_num) + : threads_(thread_num), + queue_(thread_num) { + assert(thread_num); + for(size_t i = 0; i < threads_.size(); i ++) { + threads_[i] = new Worker(this); } + } + ~ThreadPool() { + Stop(); + } -private: - friend class Worker; + void Start() { + for(size_t i = 0; i < threads_.size(); i++) { + threads_[i]->Start(); + } + } + void Stop() { + for(size_t i = 0; i < threads_.size(); i ++) { + queue_.Push(NULL); + } + for(size_t i = 0; i < threads_.size(); i ++) { + threads_[i]->Join(); + delete threads_[i]; + } + threads_.clear(); + } - vector threads_; - BoundedBlockingQueue queue_; + void Add(ClosureInterface* task) { + assert(task); + queue_.Push(task); + } + + private: + friend class Worker; + + vector threads_; + BoundedBlockingQueue queue_; }; // class ThreadPool } // namespace limonp diff --git a/libchinese-segmentation/libchinese-segmentation.pro b/libchinese-segmentation/libchinese-segmentation.pro index 583f794..28fb1a1 100644 --- a/libchinese-segmentation/libchinese-segmentation.pro +++ b/libchinese-segmentation/libchinese-segmentation.pro @@ -19,6 +19,8 @@ DEFINES += QT_DEPRECATED_WARNINGS #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 include(cppjieba/cppjieba.pri) +#LIBS += -L/usr/local/lib/libjemalloc -ljemalloc + SOURCES += \ chinese-segmentation.cpp \ diff --git a/libsearch/appsearch/app-match.cpp b/libsearch/appsearch/app-match.cpp index 0e0269e..7d9d030 100644 --- a/libsearch/appsearch/app-match.cpp +++ b/libsearch/appsearch/app-match.cpp @@ -46,6 +46,7 @@ AppMatch::AppMatch(QObject *parent) : QThread(parent) if(!m_interFace->isValid()) { qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message()); } + m_interFace->setTimeout(200); qDebug() << "AppMatch is new"; } @@ -227,10 +228,11 @@ void AppMatch::getDesktopFilePath() { } void AppMatch::getAppName(QMap &installed) { - QMap::const_iterator i; - for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) { - appNameMatch(i.key().app_name, installed); - } +// QMap::const_iterator i; +// for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) { +// appNameMatch(i.key().app_name, installed); +// } + appNameMatch(installed); qDebug() << "installed app match is successful!"; } @@ -275,12 +277,44 @@ void AppMatch::appNameMatch(QString appname, QMap &inst } } } +void AppMatch::appNameMatch(QMap &installed) { + QStringList list; + NameString name; + QMapIterator iter(m_installAppMap); + while(iter.hasNext()) { + iter.next(); + list = iter.value(); + name.app_name = iter.key().app_name; + if(iter.key().app_name.contains(m_sourceText, Qt::CaseInsensitive)) { + installed.insert(name, list); + continue; + } + + QStringList pinyinlist; + pinyinlist = FileUtils::findMultiToneWords(iter.key().app_name); + + for(int i = 0; i < pinyinlist.size() / 2; i++) { + QString shouzimu = pinyinlist.at(2 * i + 1); // 中文转首字母 + if(shouzimu.contains(m_sourceText, Qt::CaseInsensitive)) { + installed.insert(name, list); + break; + } + if(m_sourceText.size() < 2) + break; + QString pinyin = pinyinlist.at(2 * i); // 中文转拼音 + if(pinyin.contains(m_sourceText, Qt::CaseInsensitive)) { + installed.insert(name, list); + break; + } + } + } +} void AppMatch::softWareCenterSearch(QMap &softwarereturn) { - if(m_interFace->timeout() != -1) { - qWarning() << "softWareCente Dbus is timeout !"; - return; - } +// if(m_interFace->timeout() != -1) { +// qWarning() << "softWareCente Dbus is timeout !"; +// return; +// } slotDBusCallFinished(softwarereturn); qDebug() << "softWareCenter match app is successful!"; } @@ -349,7 +383,7 @@ void AppMatch::run() { QDir androidPath(QDir::homePath() + "/.local/share/applications/"); if(androidPath.exists()) this->getAllDesktopFilePath(QDir::homePath() + "/.local/share/applications/"); - connect(m_watchAppDir, &QFileSystemWatcher::directoryChanged, this, [ = ](const QString & path) { + connect(m_watchAppDir, &QFileSystemWatcher::directoryChanged, this, [ = ](const QString & path) { this->getDesktopFilePath(); if(path == "/usr/share/applications/") { this->getAllDesktopFilePath("/usr/share/applications/"); diff --git a/libsearch/appsearch/app-match.h b/libsearch/appsearch/app-match.h index 253992e..6f51d15 100644 --- a/libsearch/appsearch/app-match.h +++ b/libsearch/appsearch/app-match.h @@ -65,6 +65,7 @@ private: void getAppName(QMap &installed); // void appNameMatch(QString appname,QString desktoppath,QString appicon); void appNameMatch(QString appname, QMap &installed); + void appNameMatch(QMap &installed); void softWareCenterSearch(QMap &softwarereturn); diff --git a/libsearch/appsearch/app-search-plugin.cpp b/libsearch/appsearch/app-search-plugin.cpp new file mode 100644 index 0000000..b2ec215 --- /dev/null +++ b/libsearch/appsearch/app-search-plugin.cpp @@ -0,0 +1,219 @@ +#include "app-search-plugin.h" +#include +#include +#include +using namespace Zeeker; +size_t AppSearchPlugin::uniqueSymbol = 0; +QMutex AppSearchPlugin::m_mutex; +AppSearchPlugin::AppSearchPlugin(QObject *parent) : QObject(parent) +{ + SearchPluginIface::Actioninfo open { 0, tr("Open")}; + SearchPluginIface::Actioninfo addtoDesktop { 1, tr("Add Shortcut to Desktop")}; + SearchPluginIface::Actioninfo addtoPanel { 2, tr("Add Shortcut to Panel")}; + SearchPluginIface::Actioninfo install { 0, tr("Install")}; + m_actionInfo_installed << open << addtoDesktop << addtoPanel; + m_actionInfo_not_installed << install; + AppMatch::getAppMatch()->start(); + m_pool.setMaxThreadCount(2); + m_pool.setExpiryTimeout(1000); +} + +const QString AppSearchPlugin::name() +{ + return tr("Applications Search"); +} + +const QString AppSearchPlugin::description() +{ + return tr("Applications Search"); +} + +QString AppSearchPlugin::getPluginName() +{ + return tr("Applications Search"); +} + +void AppSearchPlugin::KeywordSearch(QString keyword, DataQueue *searchResult) +{ + m_mutex.lock(); + ++uniqueSymbol; + m_mutex.unlock(); + AppSearch *appsearch = new AppSearch(searchResult, keyword, uniqueSymbol); + m_pool.start(appsearch); +} + +QList AppSearchPlugin::getActioninfo(int type) +{ + switch (type) { + case 0: + return m_actionInfo_installed; + break; + case 1: + return m_actionInfo_not_installed; + break; + default: + return QList(); + break; + } +} + +void AppSearchPlugin::openAction(int actionkey, QString key, int type) +{ + switch (type) { + case 0: + switch (actionkey) { + case 0: + if(!launch(key)) { + qWarning() << "Fail to launch:" << key; + } + break; + case 1: + if(!addDesktopShortcut(key)) { + qWarning() << "Fail to add Desktop Shortcut:" << key; + } + break; + case 2: + if(!addPanelShortcut(key)) { + qWarning() << "Fail to add Panel Shortcut:" << key; + } + break; + default: + break; + } + break; + case 1: + if(!installAppAction(key)) { + qWarning() << "Fail to install:" << key; + } + break; + default: + break; + } +} + +bool AppSearchPlugin::isPreviewEnable(QString key, int type) +{ + return false; +} + +QWidget *AppSearchPlugin::previewPage(QString key, int type, QWidget *parent = nullptr) +{ + return nullptr; +} + +bool AppSearchPlugin::launch(const QString &path) +{ + GDesktopAppInfo * desktopAppInfo = g_desktop_app_info_new_from_filename(path.toLocal8Bit().data()); + bool res = static_cast(g_app_info_launch(G_APP_INFO(desktopAppInfo), nullptr, nullptr, nullptr)); + g_object_unref(desktopAppInfo); + return res; +} +bool AppSearchPlugin::addPanelShortcut(const QString& path) { + QDBusInterface iface("com.ukui.panel.desktop", + "/", + "com.ukui.panel.desktop", + QDBusConnection::sessionBus()); + if(iface.isValid()) { + QDBusReply isExist = iface.call("CheckIfExist", path); + if(isExist) { + qWarning() << "Add shortcut to panel failed, because it is already existed!"; + return false; + } + QDBusReply ret = iface.call("AddToTaskbar", path); + qDebug() << "Add shortcut to panel successed!"; + return true; + } + return false; +} + +bool AppSearchPlugin::addDesktopShortcut(const QString& path) { + QString dirpath = QStandardPaths::writableLocation(QStandardPaths::DesktopLocation); + QFileInfo fileInfo(path); + QString desktopfn = fileInfo.fileName(); + QFile file(path); + QString newName = QString(dirpath + "/" + desktopfn); + bool ret = file.copy(QString(dirpath + "/" + desktopfn)); + if(ret) { + QProcess process; + process.startDetached(QString("chmod a+x %1").arg(newName)); + return true; + } + return false; +} + +bool AppSearchPlugin::installAppAction(const QString & name) { + QDBusInterface * interface = new QDBusInterface("com.kylin.softwarecenter", + "/com/kylin/softwarecenter", + "com.kylin.utiliface", + QDBusConnection::sessionBus()); + + if(interface->isValid()) { + //软件商店已打开,直接跳转 + interface->call("show_search_result", name); + bool reply = QDBusReply(interface->call(QString("show_search_result"), name)); + return reply; + } else { + //软件商店未打开,打开软件商店下载此软件 + qDebug() << "Softwarecenter has not been launched, now launch it." << name; + QProcess process; + return process.startDetached(QString("kylin-software-center -find %1").arg(name)); + } +} + +AppSearch::AppSearch(DataQueue *searchResult, const QString &keyword, size_t uniqueSymbol) +{ + this->setAutoDelete(true); + m_search_result = searchResult; + m_keyword = keyword; + m_uniqueSymbol = uniqueSymbol; +} + +AppSearch::~AppSearch() +{ +} + +void AppSearch::run() +{ + //These weird code is mean to be compatible with the old version UI. + AppMatch::getAppMatch()->startMatchApp(m_keyword, m_installed_apps, m_not_installed_apps); + QMapIterator i(m_installed_apps); + while (i.hasNext()) { + i.next(); + SearchPluginIface::ResultInfo ri; + if(!QIcon::fromTheme(i.value().at(1)).isNull()) { + ri.icon = QIcon::fromTheme(i.value().at(1)); + }else { + ri.icon = QIcon(":/res/icons/desktop.png"); + } + ri.name = i.key().app_name; + ri.actionKey = i.value().at(0); + ri.type = 0; //0 means installed apps. + if (m_uniqueSymbol == AppSearchPlugin::uniqueSymbol) { + m_search_result->enqueue(ri); + } else { + break; + } + } + QMapIterator in(m_not_installed_apps); + while (in.hasNext()) { + in.next(); + SearchPluginIface::ResultInfo ri; + if(!QIcon(in.value().at(1)).isNull()) { + ri.icon = QIcon(in.value().at(1)); + }else { + ri.icon = QIcon(":/res/icons/desktop.png"); + } + ri.name = in.key().app_name; + SearchPluginIface::DescriptionInfo di; + di.key = QString(tr("Application Description:")); + di.value = in.value().at(3); + ri.description.append(di); + ri.actionKey = in.value().at(2); + ri.type = 1; //1 means not installed apps. + if (m_uniqueSymbol == AppSearchPlugin::uniqueSymbol) { + m_search_result->enqueue(ri); + } else { + break; + } + } +} diff --git a/libsearch/appsearch/app-search-plugin.h b/libsearch/appsearch/app-search-plugin.h new file mode 100644 index 0000000..01cf4e1 --- /dev/null +++ b/libsearch/appsearch/app-search-plugin.h @@ -0,0 +1,57 @@ +#ifndef APPSEARCHPLUGIN_H +#define APPSEARCHPLUGIN_H + +#include +#include "search-plugin-iface.h" +#include "app-match.h" +#include "libsearch_global.h" +namespace Zeeker { +class LIBSEARCH_EXPORT AppSearchPlugin : public QObject, public SearchPluginIface +{ + friend class AppSearch; + Q_OBJECT +public: + AppSearchPlugin(QObject *parent = nullptr); + PluginType pluginType() {return PluginType::SearchPlugin;} + const QString name(); + const QString description(); + const QIcon icon() {return QIcon::fromTheme("appsearch");} + void setEnable(bool enable) {m_enable = enable;} + bool isEnable() {return m_enable;} + QString getPluginName(); + + void KeywordSearch(QString keyword,DataQueue *searchResult); + QList getActioninfo(int type); + void openAction(int actionkey, QString key, int type); + bool isPreviewEnable(QString key, int type); + QWidget *previewPage(QString key, int type, QWidget *parent); +private: + bool launch(const QString &path); + bool addPanelShortcut(const QString &path); + bool addDesktopShortcut(const QString &path); + bool installAppAction(const QString &name); + bool m_enable = true; + QList m_actionInfo_installed; + QList m_actionInfo_not_installed; + QThreadPool m_pool; + static size_t uniqueSymbol; + static QMutex m_mutex; +}; + +class AppSearch : public QObject, public QRunnable { + Q_OBJECT +public: + AppSearch(DataQueue *searchResult, const QString& keyword, size_t uniqueSymbol); + ~AppSearch(); +protected: + void run() override; +private: + DataQueue *m_search_result = nullptr; + size_t m_uniqueSymbol; + QString m_keyword; + QMap m_installed_apps; + QMap m_not_installed_apps; +}; +} + +#endif // APPSEARCHPLUGIN_H diff --git a/libsearch/appsearch/appsearch.pri b/libsearch/appsearch/appsearch.pri index 4a3d55d..c222506 100644 --- a/libsearch/appsearch/appsearch.pri +++ b/libsearch/appsearch/appsearch.pri @@ -2,6 +2,8 @@ INCLUDEPATH += $$PWD HEADERS += \ $$PWD/app-match.h \ + $$PWD/app-search-plugin.h SOURCES += \ $$PWD/app-match.cpp \ + $$PWD/app-search-plugin.cpp diff --git a/libsearch/common.h b/libsearch/common.h index d3c238c..f3e2c41 100644 --- a/libsearch/common.h +++ b/libsearch/common.h @@ -1,7 +1,25 @@ #ifndef COMMON_H #define COMMON_H - +#include #define UKUI_SEARCH_PIPE_PATH (QDir::homePath()+"/.config/org.ukui/ukui-search/ukuisearch").toLocal8Bit().constData() +#define FILE_SEARCH_VALUE "0" +#define DIR_SEARCH_VALUE "1" +#define HOME_PATH QDir::homePath() +static const QMap targetFileTypeMap = { + std::map::value_type("doc", true), + std::map::value_type("docx", true), + std::map::value_type("ppt", true), + std::map::value_type("pptx", true), + std::map::value_type("xls", true), + std::map::value_type("xlsx", true), + std::map::value_type("txt", true), + std::map::value_type("dot", true), + std::map::value_type("wps", true), + std::map::value_type("pps", true), + std::map::value_type("dps", true), + std::map::value_type("et", true), + std::map::value_type("pdf", true) +}; //TODO Put things that needed to be put here here. #endif // COMMON_H diff --git a/libsearch/file-utils.cpp b/libsearch/file-utils.cpp index 5d4d3f1..d640a79 100644 --- a/libsearch/file-utils.cpp +++ b/libsearch/file-utils.cpp @@ -20,6 +20,7 @@ * */ #include "file-utils.h" +#include using namespace Zeeker; size_t FileUtils::_max_index_count = 0; @@ -177,6 +178,22 @@ QString FileUtils::getSettingName(const QString& setting) { return setting.right(setting.length() - setting.lastIndexOf("/") - 1); } +bool FileUtils::isOrUnder(QString pathA, QString pathB) +{ + if(pathA[0] != "/") + pathA.prepend("/"); + if(pathB[0] != "/") + pathB.prepend("/"); + + if(pathA.length() < pathB.length()) + return false; + + if(pathA == pathB || pathA.startsWith(pathB + "/")) + return true; + + return false; +} + void FileUtils::loadHanziTable(const QString &fileName) { QFile file(fileName); @@ -482,12 +499,30 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) { if(!file.open(QuaZip::mdUnzip)) return; - if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) + if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) { + file.close(); return; + } QuaZipFile fileR(&file); fileR.open(QIODevice::ReadOnly); //读取方式打开 + QXmlStreamReader reader(&fileR); + + while (!reader.atEnd()){ + if(reader.readNextStartElement() and reader.name().toString() == "t"){ + textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " ")); + if(textcontent.length() >= MAX_CONTENT_LENGTH/3){ + break; + } + } + } + + fileR.close(); + file.close(); + return; + +/* //原加载DOM文档方式; QDomDocument doc; doc.setContent(fileR.readAll()); fileR.close(); @@ -499,7 +534,7 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) { QDomElement wr = wp.firstChildElement("w:r"); while(!wr.isNull()) { QDomElement wt = wr.firstChildElement("w:t"); - textcontent.append(wt.text().replace("\n", "")); + textcontent.append(wt.text().replace("\n", "")).replace("\r", " "); if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) { file.close(); return; @@ -512,6 +547,7 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) { } file.close(); return; +*/ } void FileUtils::getPptxTextContent(QString &path, QString &textcontent) { @@ -527,8 +563,35 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) { if(i.startsWith(prefix)) fileList << i; } - if(fileList.isEmpty()) + if(fileList.isEmpty()) { + file.close(); return; + } + + for(int i = 0; i < fileList.size(); ++i){ + QString name = prefix + QString::number(i + 1) + ".xml"; + if(!file.setCurrentFile(name)) { + continue; + } + QuaZipFile fileR(&file); + fileR.open(QIODevice::ReadOnly); + + QXmlStreamReader reader(&fileR); + + while (!reader.atEnd()){ + if(reader.readNextStartElement() and reader.name().toString() == "t"){ + textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " ")); + if(textcontent.length() >= MAX_CONTENT_LENGTH/3){ + break; + } + } + } + fileR.close(); + } + file.close(); + return; + +/* QDomElement sptree; QDomElement sp; QDomElement txbody; @@ -596,6 +659,7 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) { } file.close(); return; +*/ } void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) { @@ -606,12 +670,30 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) { if(!file.open(QuaZip::mdUnzip)) return; - if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) + if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) { + file.close(); return; + } QuaZipFile fileR(&file); - fileR.open(QIODevice::ReadOnly); //读取方式打开 + fileR.open(QIODevice::ReadOnly); + QXmlStreamReader reader(&fileR); + + while (!reader.atEnd()){ + if(reader.readNextStartElement() and reader.name().toString() == "t"){ + textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " ")); + if(textcontent.length() >= MAX_CONTENT_LENGTH/3){ + break; + } + } + } + + fileR.close(); + file.close(); + return; + +/* QDomDocument doc; doc.setContent(fileR.readAll()); fileR.close(); @@ -641,16 +723,19 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) { } file.close(); return; +*/ } void FileUtils::getPdfTextContent(QString &path, QString &textcontent) { Poppler::Document *doc = Poppler::Document::load(path); - if(doc->isLocked()) + if(doc->isLocked()) { + delete doc; return; + } const QRectF qf; int pageNum = doc->numPages(); for(int i = 0; i < pageNum; ++i) { - textcontent.append(doc->page(i)->text(qf).replace("\n", "")); + textcontent.append(doc->page(i)->text(qf).replace("\n", "").replace("\r", " ")); if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) break; } @@ -679,7 +764,7 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent) { stream.setCodec(codec); uchardet_delete(chardet); - textcontent = stream.readAll().replace("\n", ""); + textcontent = stream.readAll().replace("\n", "").replace("\r", " "); file.close(); encodedString.clear(); @@ -688,3 +773,18 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent) { return; } + +bool FileUtils::openFile(QString &path, bool openInDir) +{ + if(openInDir) { + return QDesktopServices::openUrl(QUrl::fromLocalFile(path.left(path.lastIndexOf("/")))); + } else { + return QDesktopServices::openUrl(QUrl::fromLocalFile(path)); + } +} + +bool FileUtils::copyPath(QString &path) +{ + QApplication::clipboard()->setText(path); + return true; +} diff --git a/libsearch/file-utils.h b/libsearch/file-utils.h index a352d63..c718f53 100644 --- a/libsearch/file-utils.h +++ b/libsearch/file-utils.h @@ -35,6 +35,9 @@ #include #include #include +#include +#include +#include #include #include @@ -67,6 +70,8 @@ public: static QString getFileName(const QString &); static QString getAppName(const QString &); static QString getSettingName(const QString &); + //A is or under B + static bool isOrUnder(QString pathA, QString pathB); //chinese character to pinyin static QMap map_chinese2pinyin; @@ -81,6 +86,9 @@ public: static void getXlsxTextContent(QString &path, QString &textcontent); static void getPdfTextContent(QString &path, QString &textcontent); static void getTxtContent(QString &path, QString &textcontent); + + static bool openFile(QString &path, bool openInDir = false); + static bool copyPath(QString &path); static size_t _max_index_count; static size_t _current_index_count; //this one has been Abandoned,do not use it. static unsigned short _index_status; diff --git a/libsearch/global-settings.cpp b/libsearch/global-settings.cpp index 32df70c..3aebc89 100644 --- a/libsearch/global-settings.cpp +++ b/libsearch/global-settings.cpp @@ -83,13 +83,13 @@ GlobalSettings::GlobalSettings(QObject *parent) : QObject(parent) { connect(m_theme_gsettings, &QGSettings::changed, this, [ = ](const QString & key) { if(key == STYLE_NAME_KEY) { //当前主题改变时也发出paletteChanged信号,通知主界面刷新 - qApp->paletteChanged(qApp->palette()); m_cache.remove(STYLE_NAME_KEY); m_cache.insert(STYLE_NAME_KEY, m_theme_gsettings->get(STYLE_NAME_KEY).toString()); - } else if(key == FONT_SIZE_KEY) { qApp->paletteChanged(qApp->palette()); + } else if(key == FONT_SIZE_KEY) { m_cache.remove(FONT_SIZE_KEY); m_cache.insert(FONT_SIZE_KEY, m_theme_gsettings->get(FONT_SIZE_KEY).toDouble()); + qApp->paletteChanged(qApp->palette()); } else if (key == ICON_THEME_KEY) { qApp->paletteChanged(qApp->palette()); } @@ -146,24 +146,28 @@ bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool rem m_block_dirs_settings->remove(path); return true; } - if(!path.startsWith("/home")) { +// if(!path.startsWith("/home")) { // returnCode = QString(tr("I can only search your user directory, it doesn't make any sense if you block an directory which is not in user directory!")); - returnCode = PATH_NOT_IN_HOME; - return false; - } +// returnCode = PATH_NOT_IN_HOME; +// return false; +// } //why QSetting's key can't start with "/"?? QString pathKey = path.right(path.length() - 1); + if (pathKey.endsWith(QLatin1Char('/'))) { + pathKey = pathKey.mid(0, pathKey.length() - 1); + } + QStringList blockDirs = m_block_dirs_settings->allKeys(); for(QString i : blockDirs) { - if(pathKey.startsWith(i)) { + if(FileUtils::isOrUnder(pathKey, i)) { // returnCode = QString(tr("My parent folder has been blocked!")); returnCode = PATH_PARENT_BLOCKED; return false; } - if(i.startsWith(pathKey)) + if(FileUtils::isOrUnder(i, pathKey)) m_block_dirs_settings->remove(i); } m_block_dirs_settings->setValue(pathKey, "0"); diff --git a/libsearch/global-settings.h b/libsearch/global-settings.h index 4a61cc2..de520ab 100644 --- a/libsearch/global-settings.h +++ b/libsearch/global-settings.h @@ -36,6 +36,7 @@ #include #include #include "libsearch_global.h" +#include "file-utils.h" #define CONTROL_CENTER_PERSONALISE_GSETTINGS_ID "org.ukui.control-center.personalise" #define TRANSPARENCY_KEY "transparency" diff --git a/libsearch/index/construct-document.cpp b/libsearch/index/construct-document.cpp index 5be7af1..445bdb5 100644 --- a/libsearch/index/construct-document.cpp +++ b/libsearch/index/construct-document.cpp @@ -34,8 +34,8 @@ ConstructDocumentForPath::ConstructDocumentForPath(QVector list) { void ConstructDocumentForPath::run() { // qDebug()<<"ConstructDocumentForPath"; - if(!Zeeker::_doc_list_path) - Zeeker::_doc_list_path = new QList; +// if(!Zeeker::_doc_list_path) +// Zeeker::_doc_list_path = new QVector; // qDebug()<<_doc_list_path->size(); QString index_text = m_list.at(0).toLower(); QString sourcePath = m_list.at(1); @@ -87,9 +87,9 @@ void ConstructDocumentForPath::run() { } // QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc)); - Zeeker::_mutex_doc_list_path.lock(); - Zeeker::_doc_list_path->append(doc); - Zeeker::_mutex_doc_list_path.unlock(); + IndexGenerator::_mutex_doc_list_path.lock(); + IndexGenerator::_doc_list_path.append(doc); + IndexGenerator::_mutex_doc_list_path.unlock(); // qDebug()<<"ConstructDocumentForPath finish"; return; } @@ -102,32 +102,39 @@ ConstructDocumentForContent::ConstructDocumentForContent(QString path) { void ConstructDocumentForContent::run() { // qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId(); // 构造文本索引的document - if(!Zeeker::_doc_list_content) - Zeeker::_doc_list_content = new QList; +// if(!Zeeker::_doc_list_content) +// Zeeker::_doc_list_content = new QVector; QString content; FileReader::getTextContent(m_path, content); if(content.isEmpty()) return; - QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path)); - QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); - - QVector term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000).toStdString()); - + //QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path)); + //QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); Document doc; doc.setData(content); - doc.setUniqueTerm(uniqueterm); - doc.addTerm(upTerm); + //doc.setUniqueTerm(uniqueterm); + doc.setUniqueTerm(FileUtils::makeDocUterm(m_path)); + //doc.addTerm(upTerm); + doc.addTerm(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); doc.addValue(m_path); - for(int i = 0; i < term.size(); ++i) { - doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast(term.at(i).weight)); + //'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info. + content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " "); + +// QVector term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000)); + std::vector term = ChineseSegmentation::getInstance()->callSegementStd(content.left(20480000).toStdString()); + + for(size_t i = 0; i < term.size(); ++i) { + doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast(term.at(i).weight)); } - Zeeker::_mutex_doc_list_content.lock(); - Zeeker::_doc_list_content->append(doc); - Zeeker::_mutex_doc_list_content.unlock(); + IndexGenerator::_mutex_doc_list_content.lock(); + IndexGenerator::_doc_list_content.append(doc); + IndexGenerator::_mutex_doc_list_content.unlock(); content.clear(); content.squeeze(); + term.clear(); + term.shrink_to_fit(); return; } diff --git a/libsearch/index/data-queue.cpp b/libsearch/index/data-queue.cpp new file mode 100644 index 0000000..b06cd4f --- /dev/null +++ b/libsearch/index/data-queue.cpp @@ -0,0 +1,6 @@ +#include "data-queue.h" + +DataQueue::DataQueue() +{ + +} diff --git a/libsearch/index/data-queue.h b/libsearch/index/data-queue.h new file mode 100644 index 0000000..2af8741 --- /dev/null +++ b/libsearch/index/data-queue.h @@ -0,0 +1,11 @@ +#ifndef DATAQUEUE_H +#define DATAQUEUE_H + + +class DataQueue +{ +public: + DataQueue(); +}; + +#endif // DATAQUEUE_H diff --git a/libsearch/index/document.cpp b/libsearch/index/document.cpp index 68b21df..57f907a 100644 --- a/libsearch/index/document.cpp +++ b/libsearch/index/document.cpp @@ -37,6 +37,17 @@ void Document::addPosting(std::string term, QVector offset, int weight) } } +void Document::addPosting(std::string &term, std::vector &offset, int weight) { + if(term == "") + return; + if(term.length() > 240) + term = QString::fromStdString(term).left(30).toStdString(); + + for(size_t i : offset) { + m_document.add_posting(term, i, weight); + } +} + void Document::addPosting(std::string term, unsigned int offset, int weight) { if(term == "") return; @@ -52,6 +63,12 @@ void Document::addTerm(QString term) { m_document.add_term(term.toStdString()); } +void Document::addTerm(std::string term) { + if(term.empty()) + return; + m_document.add_term(term); +} + void Document::addValue(QString value) { m_document.add_value(1, value.toStdString()); } @@ -62,12 +79,20 @@ void Document::setUniqueTerm(QString term) { m_document.add_term(term.toStdString()); // m_unique_term = new QString(term); - m_unique_term = std::move(term); + m_unique_term = std::move(term.toStdString()); } + +void Document::setUniqueTerm(std::string term) { + if(term.empty()) + return; + m_document.add_term(term); + m_unique_term = term; +} + std::string Document::getUniqueTerm() { // qDebug()<<"m_unique_term!"<<*m_unique_term; // qDebug() << QString::fromStdString(m_unique_term.toStdString()); - return m_unique_term.toStdString(); + return m_unique_term;//.toStdString(); } void Document::setIndexText(QStringList indexText) { diff --git a/libsearch/index/document.h b/libsearch/index/document.h index d4549e8..84e6262 100644 --- a/libsearch/index/document.h +++ b/libsearch/index/document.h @@ -41,10 +41,13 @@ public: } void setData(QString &data); void addPosting(std::string term, QVector offset, int weight = 1); + void addPosting(std::string &term, std::vector &offset, int weight = 1); void addPosting(std::string term, unsigned int offset, int weight = 1); void addTerm(QString term); + void addTerm(std::string term); void addValue(QString value); void setUniqueTerm(QString term); + void setUniqueTerm(std::string term); std::string getUniqueTerm(); void setIndexText(QStringList indexText); QStringList getIndexText(); @@ -52,7 +55,8 @@ public: private: Xapian::Document m_document; QStringList m_index_text; - QString m_unique_term; + //QString m_unique_term; + std::string m_unique_term; }; } diff --git a/libsearch/index/file-search-plugin.cpp b/libsearch/index/file-search-plugin.cpp new file mode 100644 index 0000000..3b42d88 --- /dev/null +++ b/libsearch/index/file-search-plugin.cpp @@ -0,0 +1,232 @@ +#include "file-search-plugin.h" +#include "search-manager.h" +#include +#include +#include +using namespace Zeeker; + +FileSearchPlugin::FileSearchPlugin(QObject *parent) : QObject(parent) +{ + SearchPluginIface::Actioninfo open { 0, tr("Open")}; + SearchPluginIface::Actioninfo Openpath { 1, tr("Open path")}; + SearchPluginIface::Actioninfo CopyPath { 2, tr("Copy Path")}; + m_actionInfo << open << Openpath << CopyPath; + m_pool.setMaxThreadCount(2); + m_pool.setExpiryTimeout(1000); +} + +const QString FileSearchPlugin::name() +{ + return tr("File Search"); +} + +const QString FileSearchPlugin::description() +{ + return tr("File search."); +} + +QString FileSearchPlugin::getPluginName() +{ + return tr("File Search"); +} + +void Zeeker::FileSearchPlugin::KeywordSearch(QString keyword, DataQueue *searchResult) +{ + SearchManager::m_mutex1.lock(); + ++SearchManager::uniqueSymbol1; + SearchManager::m_mutex1.unlock(); + + if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) { + DirectSearch *directSearch; + directSearch = new DirectSearch(keyword, searchResult, FILE_SEARCH_VALUE, SearchManager::uniqueSymbol1); + m_pool.start(directSearch); + } else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) { + FileSearch *filesearch; + filesearch = new FileSearch(searchResult, SearchManager::uniqueSymbol1, keyword, FILE_SEARCH_VALUE, 1, 0, 5); + m_pool.start(filesearch); + } +} + +QList FileSearchPlugin::getActioninfo(int type) +{ + return m_actionInfo; +} + +void FileSearchPlugin::openAction(int actionkey, QString key, int type) +{ + //TODO add some return message here. + switch (actionkey) { + case 0: + FileUtils::openFile(key); + break; + case 1: + FileUtils::openFile(key, true); + case 2: + FileUtils::copyPath(key); + default: + break; + } +} + +bool FileSearchPlugin::isPreviewEnable(QString key, int type) +{ + return true; +} + +QWidget *FileSearchPlugin::previewPage(QString key, int type, QWidget *parent) +{ + QWidget *previewPage = new QWidget(parent); + QHBoxLayout * previewLyt = new QHBoxLayout(previewPage); + previewLyt->setContentsMargins(0, 0, 0, 0); + QLabel *label = new QLabel(previewPage); + previewLyt->addWidget(label); + label->setFixedHeight(120); + previewPage->setFixedSize(120,120); + previewLyt->setAlignment(Qt::AlignCenter); + label->setPixmap(FileUtils::getFileIcon(QUrl::fromLocalFile(key).toString()).pixmap(120,120)); + return previewPage; +} + +DirSearchPlugin::DirSearchPlugin(QObject *parent) : QObject(parent) +{ + SearchPluginIface::Actioninfo open { 0, tr("Open")}; + SearchPluginIface::Actioninfo Openpath { 1, tr("Open path")}; + SearchPluginIface::Actioninfo CopyPath { 2, tr("Copy Path")}; + m_actionInfo << open << Openpath << CopyPath; + m_pool.setMaxThreadCount(2); + m_pool.setExpiryTimeout(1000); +} + +const QString DirSearchPlugin::name() +{ + return tr("Dir Search"); +} + +const QString DirSearchPlugin::description() +{ + return tr("Dir search."); +} + +QString DirSearchPlugin::getPluginName() +{ + return tr("Dir Search"); +} + +void Zeeker::DirSearchPlugin::KeywordSearch(QString keyword, DataQueue *searchResult) +{ + SearchManager::m_mutex2.lock(); + ++SearchManager::uniqueSymbol2; + SearchManager::m_mutex2.unlock(); + + if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) { + DirectSearch *directSearch; + directSearch = new DirectSearch(keyword, searchResult, DIR_SEARCH_VALUE, SearchManager::uniqueSymbol2); + m_pool.start(directSearch); + } else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) { + FileSearch *filesearch; + filesearch = new FileSearch(searchResult, SearchManager::uniqueSymbol2, keyword, DIR_SEARCH_VALUE, 1, 0, 5); + m_pool.start(filesearch); + } +} + +QList DirSearchPlugin::getActioninfo(int type) +{ + return m_actionInfo; +} + +void DirSearchPlugin::openAction(int actionkey, QString key, int type) +{ + //TODO add some return message here. + switch (actionkey) { + case 0: + FileUtils::openFile(key); + break; + case 1: + FileUtils::openFile(key, true); + case 2: + FileUtils::copyPath(key); + default: + break; + } +} + +bool DirSearchPlugin::isPreviewEnable(QString key, int type) +{ + return false; +} + +QWidget *DirSearchPlugin::previewPage(QString key, int type, QWidget *parent) +{ + return nullptr; +} + +FileContengSearchPlugin::FileContengSearchPlugin(QObject *parent) : QObject(parent) +{ + SearchPluginIface::Actioninfo open { 0, tr("Open")}; + SearchPluginIface::Actioninfo Openpath { 1, tr("Open path")}; + SearchPluginIface::Actioninfo CopyPath { 2, tr("Copy Path")}; + m_actionInfo << open << Openpath << CopyPath; + m_pool.setMaxThreadCount(2); + m_pool.setExpiryTimeout(1000); +} + +const QString FileContengSearchPlugin::name() +{ + return tr("File Content Search"); +} + +const QString FileContengSearchPlugin::description() +{ + return tr("File content search."); +} + +QString FileContengSearchPlugin::getPluginName() +{ + return tr("File content search"); +} + +void Zeeker::FileContengSearchPlugin::KeywordSearch(QString keyword, DataQueue *searchResult) +{ + SearchManager::m_mutex3.lock(); + ++SearchManager::uniqueSymbol3; + SearchManager::m_mutex3.unlock(); + + if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) { + return; + } else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) { + FileContentSearch *fileContentSearch; + fileContentSearch = new FileContentSearch(searchResult, SearchManager::uniqueSymbol3, keyword, 0, 5); + m_pool.start(fileContentSearch); + } +} + +QList FileContengSearchPlugin::getActioninfo(int type) +{ + return m_actionInfo; +} + +void FileContengSearchPlugin::openAction(int actionkey, QString key, int type) +{ + //TODO add some return message here. + switch (actionkey) { + case 0: + FileUtils::openFile(key); + break; + case 1: + FileUtils::openFile(key, true); + case 2: + FileUtils::copyPath(key); + default: + break; + } +} + +bool FileContengSearchPlugin::isPreviewEnable(QString key, int type) +{ + return false; +} + +QWidget *FileContengSearchPlugin::previewPage(QString key, int type, QWidget *parent) +{ + return nullptr; +} diff --git a/libsearch/index/file-search-plugin.h b/libsearch/index/file-search-plugin.h new file mode 100644 index 0000000..7ebc137 --- /dev/null +++ b/libsearch/index/file-search-plugin.h @@ -0,0 +1,87 @@ +#ifndef FILESEARCHPLUGIN_H +#define FILESEARCHPLUGIN_H + +#include +#include + +#include "libsearch_global.h" +#include "search-plugin-iface.h" +#include "common.h" +namespace Zeeker { +//internal plugin +class LIBSEARCH_EXPORT FileSearchPlugin : public QObject, public SearchPluginIface +{ + Q_OBJECT +public: + FileSearchPlugin(QObject *parent = nullptr); + PluginType pluginType() {return PluginType::SearchPlugin;} + const QString name(); + const QString description(); + const QIcon icon() {return QIcon::fromTheme("folder");} + void setEnable(bool enable) {m_enable = enable;} + bool isEnable() {return m_enable;} + QString getPluginName(); + + void KeywordSearch(QString keyword,DataQueue *searchResult); + QList getActioninfo(int type); + void openAction(int actionkey, QString key, int type = 0); + bool isPreviewEnable(QString key, int type); + QWidget *previewPage(QString key, int type, QWidget *parent = nullptr); + +private: + bool m_enable = true; + QList m_actionInfo; + QThreadPool m_pool; +}; + +class LIBSEARCH_EXPORT DirSearchPlugin : public QObject, public SearchPluginIface +{ + Q_OBJECT +public: + DirSearchPlugin(QObject *parent = nullptr); + PluginType pluginType() {return PluginType::SearchPlugin;} + const QString name(); + const QString description(); + const QIcon icon() {return QIcon::fromTheme("folder");} + void setEnable(bool enable) {m_enable = enable;} + bool isEnable() {return m_enable;} + QString getPluginName(); + + void KeywordSearch(QString keyword,DataQueue *searchResult); + QList getActioninfo(int type); + void openAction(int actionkey, QString key, int type = 0); + bool isPreviewEnable(QString key, int type); + QWidget *previewPage(QString key, int type, QWidget *parent = nullptr); +private: + bool m_enable = true; + QList m_actionInfo; + QThreadPool m_pool; +}; + +class LIBSEARCH_EXPORT FileContengSearchPlugin : public QObject, public SearchPluginIface +{ + Q_OBJECT +public: + FileContengSearchPlugin(QObject *parent = nullptr); + PluginType pluginType() {return PluginType::SearchPlugin;} + const QString name(); + const QString description(); + const QIcon icon() {return QIcon::fromTheme("folder");} + void setEnable(bool enable) {m_enable = enable;} + bool isEnable() {return m_enable;} + QString getPluginName(); + + void KeywordSearch(QString keyword,DataQueue *searchResult); + QList getActioninfo(int type); + void openAction(int actionkey, QString key, int type = 0); + bool isPreviewEnable(QString key, int type); + QWidget *previewPage(QString key, int type, QWidget *parent = nullptr); +private: + bool m_enable = true; + QList m_actionInfo; + QThreadPool m_pool; +}; +} + + +#endif // FILESEARCHPLUGIN_H diff --git a/libsearch/index/first-index.cpp b/libsearch/index/first-index.cpp index b8fafb9..15f6429 100644 --- a/libsearch/index/first-index.cpp +++ b/libsearch/index/first-index.cpp @@ -26,6 +26,8 @@ //#define DELETE_QUEUE(a ) using namespace Zeeker; FirstIndex::FirstIndex() { + m_pool.setMaxThreadCount(2); + m_pool.setExpiryTimeout(100); } FirstIndex::~FirstIndex() { @@ -46,7 +48,48 @@ void FirstIndex::DoSomething(const QFileInfo& fileInfo) { // qDebug() << "there are some shit here"<q_index->enqueue(QVector() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0")); if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) { - this->q_content_index->enqueue(fileInfo.absoluteFilePath()); + //this->q_content_index->enqueue(fileInfo.absoluteFilePath()); + if (fileInfo.fileName().split(".").last() == "docx") { + QuaZip file(fileInfo.absoluteFilePath()); + if(!file.open(QuaZip::mdUnzip)) + return; + if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) + return; + QuaZipFile fileR(&file); + this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//docx解压缩后的xml文件为实际需要解析文件大小 + file.close(); + } else if (fileInfo.fileName().split(".").last() == "pptx") { + QuaZip file(fileInfo.absoluteFilePath()); + if(!file.open(QuaZip::mdUnzip)) + return; + QString prefix("ppt/slides/slide"); + qint64 fileSize(0); + qint64 fileIndex(0); + for(QString i : file.getFileNameList()) { + if(i.startsWith(prefix)){ + QString name = prefix + QString::number(fileIndex + 1) + ".xml"; + fileIndex++; + if(!file.setCurrentFile(name)) { + continue; + } + QuaZipFile fileR(&file); + fileSize += fileR.usize(); + } + } + file.close(); + this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileSize));//pptx解压缩后的xml文件为实际需要解析文件大小 + } else if (fileInfo.fileName().split(".").last() == "xlsx") { + QuaZip file(fileInfo.absoluteFilePath()); + if(!file.open(QuaZip::mdUnzip)) + return; + if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) + return; + QuaZipFile fileR(&file); + this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//xlsx解压缩后的xml文件为实际解析文件大小 + file.close(); + } else { + this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size())); + } } } @@ -54,19 +97,6 @@ void FirstIndex::run() { QTime t1 = QTime::currentTime(); // Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running. - QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search"); - if(!fifoDir.exists()) - qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath()); - - unlink(UKUI_SEARCH_PIPE_PATH); - int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777); - if(retval == -1) { - qCritical() << "creat fifo error!!"; - syslog(LOG_ERR, "creat fifo error!!\n"); - assert(false); - return; - } - qDebug() << "create fifo success\n"; QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString(); QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString(); @@ -90,8 +120,9 @@ void FirstIndex::run() { this->q_index = new QQueue>(); //this->q_content_index = new QQueue(); - NEW_QUEUE(this->q_content_index); + //NEW_QUEUE(this->q_content_index); // this->mlm = new MessageListManager(); + this->q_content_index = new QQueue>(); int fifo_fd; char buffer[2]; @@ -110,7 +141,6 @@ void FirstIndex::run() { ++FileUtils::_index_status; - pid_t pid; pid = fork(); if(pid == 0) { @@ -129,6 +159,7 @@ void FirstIndex::run() { p_indexGenerator = IndexGenerator::getInstance(true, this); } + //TODO Fix these weird code. QSemaphore sem(5); QMutex mutex1, mutex2, mutex3; mutex1.lock(); @@ -144,40 +175,53 @@ void FirstIndex::run() { qDebug() << "max_index_count:" << FileUtils::_max_index_count; sem.release(5); // }); - QtConcurrent::run([&]() { + QtConcurrent::run(&m_pool, [&]() { sem.acquire(2); mutex2.unlock(); qDebug() << "index start;"; - QQueue>* tmp = new QQueue>(); + QQueue>* tmp1 = new QQueue>(); while(!this->q_index->empty()) { for(size_t i = 0; (i < 8192) && (!this->q_index->empty()); ++i) { - tmp->enqueue(this->q_index->dequeue()); + tmp1->enqueue(this->q_index->dequeue()); } - this->p_indexGenerator->creatAllIndex(tmp); - tmp->clear(); + this->p_indexGenerator->creatAllIndex(tmp1); + tmp1->clear(); } -// this->p_indexGenerator->setSynonym(); - delete tmp; + delete tmp1; qDebug() << "index end;"; sem.release(2); }); - QtConcurrent::run([&]() { + QtConcurrent::run(&m_pool,[&]() { sem.acquire(2); mutex3.unlock(); - QQueue* tmp = new QQueue(); + QQueue* tmp2 = new QQueue(); qDebug() << "q_content_index:" << q_content_index->size(); while(!this->q_content_index->empty()) { -// for (size_t i = 0; (i < this->u_send_length) && (!this->q_content_index->empty()); ++i){ - for(size_t i = 0; (i < 30) && (!this->q_content_index->empty()); ++i) { - tmp->enqueue(this->q_content_index->dequeue()); + // for (size_t i = 0; (i < this->u_send_length) && (!this->q_content_index->empty()); ++i){ + qint64 fileSize = 0; + //修改一次处理的数据量,从30个文件改为文件总大小为50M以下,50M为暂定值--jxx20210519 + for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->q_content_index->empty()); ++i) { + QPair tempPair = this->q_content_index->dequeue(); + fileSize += tempPair.second; + if (fileSize > 52428800 ) { + if (tmp2->size() == 0) { + tmp2->enqueue(tempPair.first); + break; + } + this->q_content_index->enqueue(tempPair); + break; + } + tmp2->enqueue(tempPair.first); } - this->p_indexGenerator->creatAllIndex(tmp); - tmp->clear(); + // qDebug() << ">>>>>>>>all fileSize:" << fileSize << "file num:" << tmp->size() << "<<<<<<<<<<<<<<<<<<<"; + this->p_indexGenerator->creatAllIndex(tmp2); + tmp2->clear(); } - delete tmp; + delete tmp2; qDebug() << "content index end;"; sem.release(2); }); + mutex1.lock(); mutex2.lock(); mutex3.lock(); diff --git a/libsearch/index/first-index.h b/libsearch/index/first-index.h index 7455992..ab1d23e 100644 --- a/libsearch/index/first-index.h +++ b/libsearch/index/first-index.h @@ -56,13 +56,16 @@ private: bool bool_dataBaseStatusOK = false; bool bool_dataBaseExist = false; IndexGenerator* p_indexGenerator = nullptr; + QThreadPool m_pool; //here should be refact // MessageListManager* mlm; //test QQueue>* q_index; - QQueue* q_content_index; +// QQueue* q_content_index; + //修改QQueue存储数据为QPair,增加存储文件大小数据便于处理时统计--jxx20210519 + QQueue>* q_content_index; const QMap targetFileTypeMap = { std::map::value_type("doc", true), diff --git a/libsearch/index/index-generator.cpp b/libsearch/index/index-generator.cpp index 8c02122..06efc6e 100644 --- a/libsearch/index/index-generator.cpp +++ b/libsearch/index/index-generator.cpp @@ -28,9 +28,8 @@ #include "file-utils.h" #include "index-generator.h" #include "chinese-segmentation.h" -#include "construct-document.h" #include - +#include #define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString() #define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString() @@ -39,10 +38,14 @@ using namespace Zeeker; static IndexGenerator *global_instance = nullptr; QMutex IndexGenerator::m_mutex; -QList *Zeeker::_doc_list_path; -QMutex Zeeker::_mutex_doc_list_path; -QList *Zeeker::_doc_list_content; -QMutex Zeeker::_mutex_doc_list_content; +//QVector *Zeeker::_doc_list_path; +//QMutex Zeeker::_mutex_doc_list_path; +//QVector *Zeeker::_doc_list_content; +//QMutex Zeeker::_mutex_doc_list_content; +QMutex IndexGenerator::_mutex_doc_list_path; +QMutex IndexGenerator::_mutex_doc_list_content; +QVector IndexGenerator::_doc_list_path = QVector(); +QVector IndexGenerator::_doc_list_content = QVector(); IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent) { QMutexLocker locker(&m_mutex); @@ -61,45 +64,33 @@ bool IndexGenerator::setIndexdataPath() { //文件名索引 bool IndexGenerator::creatAllIndex(QQueue > *messageList) { -// FileUtils::_index_status |= 0x1; -// qDebug() << messageList->size(); HandlePathList(messageList); - if(_doc_list_path == NULL) { +// if(_doc_list_path == NULL) { +// return false; +// } + if(IndexGenerator::_doc_list_path.isEmpty()) { return false; } qDebug() << "begin creatAllIndex"; -// GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "0"); try { -// m_indexer = new Xapian::TermGenerator(); -// m_indexer.set_database(*m_database_path); - //可以实现拼写纠正 -// m_indexer->set_flags(Xapian::TermGenerator::FLAG_SPELLING); -// m_indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME); - -// int count =0; - - for(auto i : *_doc_list_path) { + for(auto i : IndexGenerator::_doc_list_path) { insertIntoDatabase(i); -// if(++count > 8999){ -// count = 0; -// m_database_path->commit(); -// } } m_database_path->commit(); } catch(const Xapian::Error &e) { qWarning() << "creatAllIndex fail!" << QString::fromStdString(e.get_description()); //need a record - IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "1"); -// FileUtils::_index_status &= ~0x1; + IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "1"); assert(false); } -// GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2"); qDebug() << "finish creatAllIndex"; -// FileUtils::_index_status &= ~0x1; - _doc_list_path->clear(); - delete _doc_list_path; - _doc_list_path = nullptr; + IndexGenerator::_doc_list_path.clear(); + IndexGenerator::_doc_list_path.squeeze(); + QVector().swap(IndexGenerator::_doc_list_path); + +// delete _doc_list_path; +// _doc_list_path = nullptr; return true; } //文件内容索引 @@ -107,16 +98,19 @@ bool IndexGenerator::creatAllIndex(QQueue *messageList) { // FileUtils::_index_status |= 0x2; HandlePathList(messageList); qDebug() << "begin creatAllIndex for content"; - if(_doc_list_content == NULL) { +// if(_doc_list_content == NULL) { +// return false; +// } + if(IndexGenerator::_doc_list_content.isEmpty()) { return false; } - int size = _doc_list_content->size(); + int size = IndexGenerator::_doc_list_content.size(); qDebug() << "begin creatAllIndex for content" << size; if(!size == 0) { // GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "0"); try { int count = 0; - for(auto i : *_doc_list_content) { + for(auto i : IndexGenerator::_doc_list_content) { insertIntoContentDatabase(i); if(++count > 999) { count = 0; @@ -133,9 +127,11 @@ bool IndexGenerator::creatAllIndex(QQueue *messageList) { // GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2"); // FileUtils::_index_status &= ~0x2; qDebug() << "finish creatAllIndex for content"; - _doc_list_content->clear(); - delete _doc_list_content; - _doc_list_content = nullptr; + + IndexGenerator::_doc_list_content.clear(); + IndexGenerator::_doc_list_content.squeeze(); + QVector().swap(IndexGenerator::_doc_list_content); + malloc_trim(0); } Q_EMIT this->transactionFinished(); return true; @@ -297,7 +293,7 @@ void IndexGenerator::HandlePathList(QQueue *messageList) { return; } - +//deprecated Document IndexGenerator::GenerateDocument(const QVector &list) { Document doc; // qDebug()< &list) { return doc; } - +//deprecated Document IndexGenerator::GenerateContentDocument(const QString &path) { // 构造文本索引的document QString content; @@ -389,7 +385,7 @@ bool IndexGenerator::isIndexdataExist() { } - +//deprecated QStringList IndexGenerator::IndexSearch(QString indexText) { QStringList searchResult; try { @@ -455,96 +451,60 @@ QStringList IndexGenerator::IndexSearch(QString indexText) { return searchResult; } -//void IndexGenerator::setSynonym() -//{ -// try -// { -// m_database_path->add_synonym("a","A"); -// m_database_path->add_synonym("b","B"); -// m_database_path->add_synonym("c","C"); -// m_database_path->add_synonym("d","D"); -// m_database_path->add_synonym("e","A"); -// m_database_path->add_synonym("f","F"); -// m_database_path->add_synonym("g","G"); -// m_database_path->add_synonym("h","H"); -// m_database_path->add_synonym("i","I"); -// m_database_path->add_synonym("j","J"); -// m_database_path->add_synonym("k","K"); -// m_database_path->add_synonym("l","L"); -// m_database_path->add_synonym("m","M"); -// m_database_path->add_synonym("n","N"); -// m_database_path->add_synonym("o","O"); -// m_database_path->add_synonym("p","P"); -// m_database_path->add_synonym("q","Q"); -// m_database_path->add_synonym("r","R"); -// m_database_path->add_synonym("s","S"); -// m_database_path->add_synonym("t","T"); -// m_database_path->add_synonym("u","U"); -// m_database_path->add_synonym("v","V"); -// m_database_path->add_synonym("w","W"); -// m_database_path->add_synonym("x","X"); -// m_database_path->add_synonym("y","Y"); -// m_database_path->add_synonym("z","Z"); - -// m_database_path->add_synonym("A","a"); -// m_database_path->add_synonym("B","b"); -// m_database_path->add_synonym("C","c"); -// m_database_path->add_synonym("D","d"); -// m_database_path->add_synonym("E","e"); -// m_database_path->add_synonym("F","f"); -// m_database_path->add_synonym("G","g"); -// m_database_path->add_synonym("H","h"); -// m_database_path->add_synonym("I","i"); -// m_database_path->add_synonym("J","j"); -// m_database_path->add_synonym("K","k"); -// m_database_path->add_synonym("L","a"); -// m_database_path->add_synonym("M","m"); -// m_database_path->add_synonym("N","n"); -// m_database_path->add_synonym("O","o"); -// m_database_path->add_synonym("P","p"); -// m_database_path->add_synonym("Q","q"); -// m_database_path->add_synonym("R","r"); -// m_database_path->add_synonym("S","s"); -// m_database_path->add_synonym("T","t"); -// m_database_path->add_synonym("U","u"); -// m_database_path->add_synonym("V","v"); -// m_database_path->add_synonym("W","w"); -// m_database_path->add_synonym("X","x"); -// m_database_path->add_synonym("Y","y"); -// m_database_path->add_synonym("Z","z"); -// m_database_path->commit(); -// } -// catch(const Xapian::Error &e) -// { -// qWarning() <isEmpty()) return true; - for(int i = 0; i < list->size(); i++) { - QString doc = list->at(i); - std::string uniqueterm = FileUtils::makeDocUterm(doc); - try { + try { + for(int i = 0; i < list->size(); i++) { + QString doc = list->at(i); + std::string uniqueterm = FileUtils::makeDocUterm(doc); qDebug() << "--delete start--"; m_database_path->delete_document(uniqueterm); m_database_content->delete_document(uniqueterm); qDebug() << "delete path" << doc; qDebug() << "delete md5" << QString::fromStdString(uniqueterm); - m_database_path->commit(); - m_database_content->commit(); qDebug() << "--delete finish--"; -// qDebug()<<"m_database_path->get_lastdocid()!!!"<get_lastdocid(); - -// qDebug()<<"m_database_path->get_doccount()!!!"<get_doccount(); - } catch(const Xapian::Error &e) { - qWarning() << QString::fromStdString(e.get_description()); - return false; + // qDebug()<<"m_database_path->get_lastdocid()!!!"<get_lastdocid(); + // qDebug()<<"m_database_path->get_doccount()!!!"<get_doccount(); } + m_database_path->commit(); + m_database_content->commit(); + } catch(const Xapian::Error &e) { + qWarning() << QString::fromStdString(e.get_description()); + return false; } + Q_EMIT this->transactionFinished(); return true; } +bool IndexGenerator::updateIndex(QVector *pendingFiles) +{ + QQueue> *fileIndexInfo = new QQueue>; + QQueue *fileContentIndexInfo = new QQueue; + QStringList *deleteList = new QStringList; + for(PendingFile file : *pendingFiles) { + if(file.shouldRemoveIndex()) { + + deleteList->append(file.path()); + continue; + } + fileIndexInfo->append(QVector() << file.path().section("/" , -1) << file.path() << QString(file.isDir() ? "1" : "0")); + if((!file.path().split(".").isEmpty()) && (true == targetFileTypeMap[file.path().section("/" , -1) .split(".").last()])) + fileContentIndexInfo->append(file.path()); + } + if(!deleteList->isEmpty()) { + deleteAllIndex(deleteList); + } + if(!fileIndexInfo->isEmpty()) { + creatAllIndex(fileIndexInfo); + } + if(!fileContentIndexInfo->isEmpty()) { + creatAllIndex(fileContentIndexInfo); + } + delete fileIndexInfo; + delete fileContentIndexInfo; + return true; +} + diff --git a/libsearch/index/index-generator.h b/libsearch/index/index-generator.h index bc614b2..873a6cd 100644 --- a/libsearch/index/index-generator.h +++ b/libsearch/index/index-generator.h @@ -29,18 +29,22 @@ #include #include //#include +#include "construct-document.h" #include "index-status-recorder.h" #include "document.h" #include "file-reader.h" #include "common.h" +#include "pending-file.h" namespace Zeeker { -extern QList *_doc_list_path; -extern QMutex _mutex_doc_list_path; -extern QList *_doc_list_content; -extern QMutex _mutex_doc_list_content; +//extern QVector *_doc_list_path; +//extern QMutex _mutex_doc_list_path; +//extern QVector *_doc_list_content; +//extern QMutex _mutex_doc_list_content; class IndexGenerator : public QObject { + friend class ConstructDocumentForPath; + friend class ConstructDocumentForContent; Q_OBJECT public: static IndexGenerator *getInstance(bool rebuild = false, QObject *parent = nullptr); @@ -58,6 +62,7 @@ public Q_SLOTS: bool creatAllIndex(QQueue> *messageList); bool creatAllIndex(QQueue *messageList); bool deleteAllIndex(QStringList *pathlist); + bool updateIndex(QVector *pendingFiles); private: explicit IndexGenerator(bool rebuild = false, QObject *parent = nullptr); @@ -72,8 +77,10 @@ private: void insertIntoDatabase(Document& doc); void insertIntoContentDatabase(Document& doc); -// QList *m_doc_list_path; //for path index -// QList *m_doc_list_content; // for text content index + static QVector _doc_list_path; + static QMutex _mutex_doc_list_path; + static QVector _doc_list_content; + static QMutex _mutex_doc_list_content; QMap m_index_map; QString m_index_data_path; Xapian::WritableDatabase* m_database_path; diff --git a/libsearch/index/index-status-recorder.cpp b/libsearch/index/index-status-recorder.cpp index 63cba32..9089722 100644 --- a/libsearch/index/index-status-recorder.cpp +++ b/libsearch/index/index-status-recorder.cpp @@ -13,8 +13,10 @@ IndexStatusRecorder *IndexStatusRecorder::getInstance() void IndexStatusRecorder::setStatus(const QString &key, const QVariant &value) { + m_mutex.lock(); m_status->setValue(key, value); m_status->sync(); + m_mutex.unlock(); } const QVariant IndexStatusRecorder::getStatus(const QString &key) diff --git a/libsearch/index/index-status-recorder.h b/libsearch/index/index-status-recorder.h index 25f8009..bf65850 100644 --- a/libsearch/index/index-status-recorder.h +++ b/libsearch/index/index-status-recorder.h @@ -4,9 +4,11 @@ #include #include #include +#include #define CONTENT_INDEX_DATABASE_STATE "content_index_database_state" #define INDEX_DATABASE_STATE "index_database_state" #define INOTIFY_NORMAL_EXIT "inotify_normal_exit" +#define PENDING_FILE_QUEUE_FINISH "pending_file_queue_finish" #define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf" namespace Zeeker { //fixme: we need a better way to record index status. @@ -21,6 +23,7 @@ public: private: explicit IndexStatusRecorder(QObject *parent = nullptr); QSettings *m_status; + QMutex m_mutex; }; } diff --git a/libsearch/index/index.pri b/libsearch/index/index.pri index 6ceadc6..0b74560 100644 --- a/libsearch/index/index.pri +++ b/libsearch/index/index.pri @@ -4,10 +4,14 @@ HEADERS += \ $$PWD/construct-document.h \ $$PWD/document.h \ $$PWD/file-reader.h \ + $$PWD/file-search-plugin.h \ $$PWD/first-index.h \ $$PWD/index-generator.h \ $$PWD/index-status-recorder.h \ $$PWD/inotify-index.h \ + $$PWD/inotify-watch.h \ + $$PWD/pending-file-queue.h \ + $$PWD/pending-file.h \ $$PWD/search-manager.h \ $$PWD/searchmethodmanager.h \ $$PWD/traverse_bfs.h \ @@ -17,10 +21,14 @@ SOURCES += \ $$PWD/construct-document.cpp \ $$PWD/document.cpp \ $$PWD/file-reader.cpp \ + $$PWD/file-search-plugin.cpp \ $$PWD/first-index.cpp \ $$PWD/index-generator.cpp \ $$PWD/index-status-recorder.cpp \ $$PWD/inotify-index.cpp \ + $$PWD/inotify-watch.cpp \ + $$PWD/pending-file-queue.cpp \ + $$PWD/pending-file.cpp \ $$PWD/search-manager.cpp \ $$PWD/searchmethodmanager.cpp \ $$PWD/traverse_bfs.cpp \ diff --git a/libsearch/index/inotify-index.cpp b/libsearch/index/inotify-index.cpp index 0396f9e..04351f1 100644 --- a/libsearch/index/inotify-index.cpp +++ b/libsearch/index/inotify-index.cpp @@ -18,6 +18,7 @@ * */ #include "inotify-index.h" +#include #define CREATE_FILE_NAME_INDEX \ indexQueue->enqueue(QVector() << QString(event->name) << QString(currentPath[event->wd] + '/' + event->name) << QString((event->mask & IN_ISDIR) ? "1" : "0")); \ @@ -51,7 +52,7 @@ InotifyIndex::InotifyIndex(const QString& path) : Traverse_BFS(path) { UkuiSearchQDBus usQDBus; usQDBus.setInotifyMaxUserWatches(); qDebug() << "setInotifyMaxUserWatches end"; - + m_sharedMemory = new QSharedMemory("ukui-search-shared-map", this); } InotifyIndex::~InotifyIndex() { @@ -341,7 +342,24 @@ void InotifyIndex::run() { qDebug() << "select timeout!"; ::free(read_timeout); IndexGenerator::getInstance()->~IndexGenerator(); -// GlobalSettings::getInstance()->forceSync(); + QBuffer buffer; + QDataStream out(&buffer); + if (m_sharedMemory->isAttached()) { + m_sharedMemory->detach(); + } + buffer.open(QBuffer::ReadWrite); + out << currentPath; + int size = buffer.size(); + if (!m_sharedMemory->create(size)) { + qDebug() << "Create sharedMemory Error: " << m_sharedMemory->errorString(); + } else { + m_sharedMemory->lock(); + char *to = static_cast(m_sharedMemory->data()); + const char *from = buffer.data().constData(); + memcpy(to, from, qMin(size, m_sharedMemory->size())); + m_sharedMemory->unlock(); + } + // GlobalSettings::getInstance()->forceSync(); ::_exit(0); } else { memset(buf, 0x00, BUF_LEN); @@ -373,6 +391,20 @@ void InotifyIndex::run() { } else if(pid > 0) { memset(buf, 0x00, BUF_LEN); waitpid(pid, NULL, 0); + if (!m_sharedMemory->attach()) { + qDebug() << "SharedMemory attach Error: " << m_sharedMemory->errorString(); + } else { + QBuffer buffer; + QDataStream in(&buffer); + QMap pathMap; + m_sharedMemory->lock(); + buffer.setData(static_cast(m_sharedMemory->constData()), m_sharedMemory->size()); + buffer.open(QBuffer::ReadWrite); + in >> pathMap; + m_sharedMemory->unlock(); + m_sharedMemory->detach(); + currentPath = pathMap; + } --FileUtils::_index_status; } else { assert(false); diff --git a/libsearch/index/inotify-index.h b/libsearch/index/inotify-index.h index fe3dd7b..1b31f2a 100644 --- a/libsearch/index/inotify-index.h +++ b/libsearch/index/inotify-index.h @@ -22,6 +22,7 @@ #include #include +#include #include #include #include "index-generator.h" @@ -77,6 +78,7 @@ private: std::map::value_type("et", true), std::map::value_type("pdf", true) }; + QSharedMemory *m_sharedMemory = nullptr; }; } diff --git a/libsearch/index/inotify-watch.cpp b/libsearch/index/inotify-watch.cpp new file mode 100644 index 0000000..eb845a8 --- /dev/null +++ b/libsearch/index/inotify-watch.cpp @@ -0,0 +1,459 @@ +#include "inotify-watch.h" +#include +#include +#include +using namespace Zeeker; +static InotifyWatch* global_instance_InotifyWatch = nullptr; + +Zeeker::InotifyWatch *Zeeker::InotifyWatch::getInstance(const QString &path) +{ + if(!global_instance_InotifyWatch) { + global_instance_InotifyWatch = new InotifyWatch(path); + } + return global_instance_InotifyWatch; +} + +Zeeker::InotifyWatch::InotifyWatch(const QString &path): Traverse_BFS(path) +{ + qDebug() << "setInotifyMaxUserWatches start"; + UkuiSearchQDBus usQDBus; + usQDBus.setInotifyMaxUserWatches(); + qDebug() << "setInotifyMaxUserWatches end"; + m_sharedMemory = new QSharedMemory("ukui-search-shared-map", this); +} + +InotifyWatch::~InotifyWatch() +{ + if(m_notifier) + delete m_notifier; + m_notifier = nullptr; +} + +bool InotifyWatch::addWatch(const QString &path) +{ + int ret = inotify_add_watch(m_inotifyFd, path.toStdString().c_str(), (IN_MOVED_FROM | IN_MOVED_TO | IN_CREATE | IN_DELETE | IN_MODIFY)); + if(ret == -1) { + qWarning() << "AddWatch error:" << path; + return false; + } + currentPath[ret] = path; +// qDebug() << "Watch: " << path << "ret: " << ret; + return true; +} + +bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase) +{ + inotify_rm_watch(m_inotifyFd, currentPath.key(path)); + + if(removeFromDatabase) { + for(QMap::Iterator i = currentPath.begin(); i != currentPath.end();) { + // qDebug() << i.value(); + // if(i.value().length() > path.length()) { + if(FileUtils::isOrUnder(i.value(), path)) { + qDebug() << "remove path: " << i.value(); + inotify_rm_watch(m_inotifyFd, currentPath.key(path)); + PendingFile f(i.value()); + f.setDeleted(); + f.setIsDir(); + PendingFileQueue::getInstance()->enqueue(f); + currentPath.erase(i++); + } else { + i++; + } + } + } else { + for(QMap::Iterator i = currentPath.begin(); i != currentPath.end();) { + // qDebug() << i.value(); + if(i.value().length() > path.length()) { + if(FileUtils::isOrUnder(i.value(), path)) { +// if(i.value().startsWith(path + "/")) { +// qDebug() << "remove path: " << i.value(); + inotify_rm_watch(m_inotifyFd, currentPath.key(path)); + currentPath.erase(i++); + } else { + i++; + } + } else { + i++; + } + } + } + currentPath.remove(currentPath.key(path)); + return true; +} + +void InotifyWatch::DoSomething(const QFileInfo &info) +{ + qDebug() << info.fileName() << "-------" << info.absoluteFilePath(); + if(info.isDir() && (!info.isSymLink())) { + this->addWatch(info.absoluteFilePath()); + } + PendingFile f(info.absoluteFilePath()); + if(info.isDir()) { + f.setIsDir(); + } + PendingFileQueue::getInstance()->enqueue(f); +} + +void InotifyWatch::firstTraverse() +{ + QQueue bfs; + bfs.enqueue(this->path); + QFileInfoList list; + QDir dir; + dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot); + dir.setSorting(QDir::DirsFirst); + while(!bfs.empty()) { + dir.setPath(bfs.dequeue()); + list = dir.entryInfoList(); + for(auto i : list) { + if(i.isDir() && (!(i.isSymLink()))) { + this->addWatch(i.absoluteFilePath()); + bfs.enqueue(i.absoluteFilePath()); + } + } + } +} + +void InotifyWatch::stopWatch() +{ +// if(this->isRunning()) { +// this->quit(); +// if(m_notifier) +// delete m_notifier; +// m_notifier = nullptr; +// removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false); +// } + +// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3"); +} + +void InotifyWatch::run() +{ + m_inotifyFd = inotify_init(); + if (m_inotifyFd > 0) { + qDebug()<<"Inotify init success!"; + } else { + qWarning() << "Inotify init fail! Now try add inotify_user_instances."; + UkuiSearchQDBus usQDBus; + usQDBus.addInotifyUserInstances(128); + m_inotifyFd = inotify_init(); + if (m_inotifyFd > 0) { + qDebug()<<"Inotify init success!"; + } else { + printf("errno=%d\n",errno); + printf("Mesg:%s\n",strerror(errno)); + Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify"); + } + } + + this->addWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation)); + this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation)); + this->firstTraverse(); + + int fifo_fd; + char buffer[2]; + memset(buffer, 0, sizeof(buffer)); + fifo_fd = open(UKUI_SEARCH_PIPE_PATH, O_RDWR); + if(fifo_fd == -1) { + qWarning() << "Open fifo error\n"; + assert(false); + } + int retval = read(fifo_fd, buffer, sizeof(buffer)); + if(retval == -1) { + qWarning() << "read error\n"; + assert(false); + } + qDebug("Read fifo[%s]", buffer); + + qDebug("Read data ok"); + close(fifo_fd); + if(buffer[0] & 0x1) { + qDebug("Data confirmed\n"); + } + unlink(UKUI_SEARCH_PIPE_PATH); + + while(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) { + fd_set fds; + FD_ZERO(&fds); + FD_SET(m_inotifyFd, &fds); + int rc; + rc = select(m_inotifyFd + 1, &fds, NULL, NULL, NULL); + if(rc > 0) { + int avail; + if (ioctl(m_inotifyFd, FIONREAD, &avail) == EINVAL) { + qWarning() << "Did not receive an entire inotify event."; + return; + } + + char* buf = (char*)malloc(avail); + memset(buf, 0x00, avail); + + const ssize_t len = read(m_inotifyFd, buf, avail); + if(len != avail) { + qWarning()<<"read event error"; + // IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); + } + + int i = 0; + while (i < len) { + const struct inotify_event* event = (struct inotify_event*)&buf[i]; + if(event->name[0] != '.') { + // qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask; + // qDebug("mask:0x%x,",event->mask); + break; + } + i += sizeof(struct inotify_event) + event->len; + } + if(i < len ) { + qDebug() << "fork"; + slotEvent(buf, len); + free(buf); + } + } else if(rc < 0) { + // error + qWarning() << "select result < 0, error!"; + IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); + assert(false); + } + } + qDebug() << "Leave watch loop"; + if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) { + IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3"); + removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false); + } + close(m_inotifyFd); +// fcntl(m_inotifyFd, F_SETFD, FD_CLOEXEC); +// m_notifier = new QSocketNotifier(m_inotifyFd, QSocketNotifier::Read); +// connect(m_notifier, &QSocketNotifier::activated, this, &InotifyWatch::slotEvent, Qt::DirectConnection); +// exec(); +} + +void InotifyWatch::slotEvent(char *buf, ssize_t len) +{ +// eventProcess(socket); + ++FileUtils::_index_status; + if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) { + pid_t pid; + pid = fork(); + if(pid == 0) { + prctl(PR_SET_PDEATHSIG, SIGTERM); + prctl(PR_SET_NAME, "inotify-index"); + this->eventProcess(buf, len); + fd_set read_fds; + int rc; + timeval* read_timeout = (timeval*)malloc(sizeof(timeval)); + read_timeout->tv_sec = 40; + read_timeout->tv_usec = 0; + for(;;) { + FD_ZERO(&read_fds); + FD_SET(m_inotifyFd, &read_fds); + rc = select(m_inotifyFd + 1, &read_fds, NULL, NULL, read_timeout); + if(rc < 0) { + // error + qWarning() << "fork select result < 0, error!"; + IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); + assert(false); + } else if(rc == 0) { + qDebug() << "select timeout!"; + ::free(read_timeout); + + QBuffer buffer; + QDataStream out(&buffer); + if (m_sharedMemory->isAttached()) { + m_sharedMemory->detach(); + } + buffer.open(QBuffer::ReadWrite); + out << currentPath; + int size = buffer.size(); + if (!m_sharedMemory->create(size)) { + qDebug() << "Create sharedMemory Error: " << m_sharedMemory->errorString(); + } else { + m_sharedMemory->lock(); + char *to = static_cast(m_sharedMemory->data()); + const char *from = buffer.data().constData(); + memcpy(to, from, qMin(size, m_sharedMemory->size())); + m_sharedMemory->unlock(); + } + // GlobalSettings::getInstance()->forceSync(); + PendingFileQueue::getInstance()->forceFinish(); + PendingFileQueue::getInstance()->~PendingFileQueue(); + ::_exit(0); + } else { +// qDebug() << "Select remain:" <tv_sec; + this->eventProcess(m_inotifyFd); +// qDebug() << "Select remain:" <tv_sec; + } + } + } else if(pid > 0) { + waitpid(pid, NULL, 0); + if (!m_sharedMemory->attach()) { + qDebug() << "SharedMemory attach Error: " << m_sharedMemory->errorString(); + } else { + QBuffer buffer; + QDataStream in(&buffer); + QMap pathMap; + m_sharedMemory->lock(); + buffer.setData(static_cast(m_sharedMemory->constData()), m_sharedMemory->size()); + buffer.open(QBuffer::ReadWrite); + in >> pathMap; + m_sharedMemory->unlock(); + m_sharedMemory->detach(); + currentPath = pathMap; + } + --FileUtils::_index_status; + } else { + assert(false); + } + } +} + +char * InotifyWatch::filter() +{ + int avail; + if (ioctl(m_inotifyFd, FIONREAD, &avail) == EINVAL) { + qWarning() << "Did not receive an entire inotify event."; + return NULL; + } + + char* buffer = (char*)malloc(avail); + memset(buffer, 0x00, avail); + + const int len = read(m_inotifyFd, buffer, avail); + if(len != avail) { + qWarning()<<"read event error"; +// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); + } + + int i = 0; + while (i < len) { + const struct inotify_event* event = (struct inotify_event*)&buffer[i]; + if(event->name[0] == '.') { + // qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask; + // qDebug("mask:0x%x,",event->mask); + i += sizeof(struct inotify_event) + event->len; + return buffer; + } + } + free(buffer); + return NULL; +} +void InotifyWatch::eventProcess(int socket) +{ +// qDebug()<< "Enter eventProcess!"; + int avail; + if (ioctl(socket, FIONREAD, &avail) == EINVAL) { + qWarning() << "Did not receive an entire inotify event."; + return; + } + + char* buffer = (char*)malloc(avail); + memset(buffer, 0x00, avail); + + const ssize_t len = read(socket, buffer, avail); + if(len != avail) { + qWarning()<<"read event error"; +// IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1"); + } + int i = 0; + while (i < len) { + const struct inotify_event* event = (struct inotify_event*)&buffer[i]; + if(event->name[0] != '.') { +// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask; +// qDebug("mask:0x%x,",event->mask); + break; + } + i += sizeof(struct inotify_event) + event->len; + } + if(i >= len) { + qDebug() << "There is nothing to do!"; + return; + } + eventProcess(buffer, len); + free(buffer); +} + +void InotifyWatch::eventProcess(const char *buffer, ssize_t len) +{ +// qDebug()<< "Begin eventProcess! len:" << len; + + char * p = const_cast(buffer); + while (p < buffer + len) { + const struct inotify_event* event = reinterpret_cast(p); +// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask; +// qDebug("mask:0x%x,",event->mask); + if(event->name[0] != '.') { + QString path = currentPath[event->wd] + '/' + event->name; + //Create top dir first, traverse it last. + if(event->mask & IN_CREATE) { +// qDebug() << "IN_CREATE"; + PendingFile f(path); + if(event->mask & IN_ISDIR) { + f.setIsDir(); + } + PendingFileQueue::getInstance(this)->enqueue(f); + + if(event->mask & IN_ISDIR) { + if(!QFileInfo(path).isSymLink()){ + addWatch(path); + setPath(path); + Traverse(); + } + } + goto next; + + } + + if((event->mask & IN_DELETE) | (event->mask & IN_MOVED_FROM)) { + qDebug() << "IN_DELETE or IN_MOVED_FROM"; + if(event->mask & IN_ISDIR) { + removeWatch(path); + } else { + PendingFile f(path); + f.setDeleted(); + PendingFileQueue::getInstance()->enqueue(f); + } + p += sizeof(struct inotify_event) + event->len; + continue; + } + if(event->mask & IN_MODIFY) { +// qDebug() << "IN_MODIFY"; + if(!(event->mask & IN_ISDIR)) { + PendingFileQueue::getInstance()->enqueue(PendingFile(path)); + } + goto next; + + } + + if(event->mask & IN_MOVED_TO) { + qDebug() << "IN_MOVED_TO"; + if(event->mask & IN_ISDIR) { + removeWatch(path); + + PendingFile f(path); + f.setIsDir(); + PendingFileQueue::getInstance()->enqueue(f); + + if(!QFileInfo(path).isSymLink()){ + addWatch(path); + setPath(path); + Traverse(); + } + } else { + //Enqueue a deleted file to merge. + PendingFile f(path); + f.setDeleted(); + PendingFileQueue::getInstance()->enqueue(f); + //Enqueue a new one. + PendingFileQueue::getInstance()->enqueue(PendingFile(path)); + } + goto next; + } + } +next: + p += sizeof(struct inotify_event) + event->len; + } +// qDebug()<< "Finish eventProcess!"; +} + + diff --git a/libsearch/index/inotify-watch.h b/libsearch/index/inotify-watch.h new file mode 100644 index 0000000..eb21587 --- /dev/null +++ b/libsearch/index/inotify-watch.h @@ -0,0 +1,52 @@ +#ifndef INOTIFYWATCH_H +#define INOTIFYWATCH_H + +#include +#include +#include +#include +#include +#include + +#include "traverse_bfs.h" +#include "ukui-search-qdbus.h" +#include "index-status-recorder.h" +#include "file-utils.h" +#include "first-index.h" +#include "pending-file-queue.h" +#include "common.h" +namespace Zeeker { +class InotifyWatch : public QThread, public Traverse_BFS +{ + Q_OBJECT +public: + static InotifyWatch* getInstance(const QString& path); + + bool addWatch(const QString &path); + bool removeWatch(const QString &path, bool removeFromDatabase = true); + virtual void DoSomething(const QFileInfo &info) final; + + void firstTraverse(); + void stopWatch(); +protected: + void run() override; + +private Q_SLOTS: + void slotEvent(char *buf, ssize_t len); +private: + explicit InotifyWatch(const QString& path); + ~InotifyWatch(); + char * filter(); + void eventProcess(int socket); + void eventProcess(const char *buffer, ssize_t len); + + int m_inotifyFd; + QSocketNotifier* m_notifier = nullptr; + QSharedMemory *m_sharedMemory = nullptr; + QMap currentPath; + QMutex m_mutex; + + +}; +} +#endif // INOTIFYWATCH_H diff --git a/libsearch/index/pending-file-queue.cpp b/libsearch/index/pending-file-queue.cpp new file mode 100644 index 0000000..47a93c9 --- /dev/null +++ b/libsearch/index/pending-file-queue.cpp @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2021, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: zhangpengfei + * + */ +#include "pending-file-queue.h" +#include "file-utils.h" +#include +using namespace Zeeker; +static PendingFileQueue *global_instance_pending_file_queue = nullptr; +PendingFileQueue::PendingFileQueue(QObject *parent) : QThread(parent) +{ + this->start(); + + m_cacheTimer = new QTimer; + m_minProcessTimer = new QTimer; + + m_cacheTimer->setInterval(10*1000); + m_cacheTimer->setSingleShot(true); + m_minProcessTimer->setInterval(500); + m_minProcessTimer->setSingleShot(true); + + m_cacheTimer->moveToThread(this); + m_minProcessTimer->moveToThread(this); + +// connect(this, &PendingFileQueue::cacheTimerStart, m_cacheTimer, f, Qt::DirectConnection); +// connect(this, &PendingFileQueue::minProcessTimerStart, m_minProcessTimer, f,Qt::DirectConnection); + connect(this, SIGNAL(cacheTimerStart()), m_cacheTimer, SLOT(start())); + connect(this, SIGNAL(minProcessTimerStart()), m_minProcessTimer, SLOT(start())); + connect(this, &PendingFileQueue::timerStop, m_cacheTimer, &QTimer::stop); + connect(this, &PendingFileQueue::timerStop, m_minProcessTimer, &QTimer::stop); + + connect(m_cacheTimer, &QTimer::timeout, this, &PendingFileQueue::processCache, Qt::DirectConnection); + connect(m_minProcessTimer, &QTimer::timeout, this, &PendingFileQueue::processCache, Qt::DirectConnection); +} + +PendingFileQueue *PendingFileQueue::getInstance(QObject *parent) +{ + if (!global_instance_pending_file_queue) { + global_instance_pending_file_queue = new PendingFileQueue(parent); + } + return global_instance_pending_file_queue; +} + +PendingFileQueue::~PendingFileQueue() +{ + if(m_cacheTimer) { + delete m_cacheTimer; + m_cacheTimer = nullptr; + } + if(m_minProcessTimer) { + delete m_minProcessTimer; + m_minProcessTimer = nullptr; + } + + IndexGenerator::getInstance()->~IndexGenerator(); +} + +void PendingFileQueue::forceFinish() +{ + QThread::msleep(600); + Q_EMIT timerStop(); + this->quit(); + this->wait(); +} +void PendingFileQueue::enqueue(const PendingFile &file) +{ +// qDebug() << "enqueuq file: " << file.path(); + m_mutex.lock(); + m_enqueuetimes++; + if(m_cache.isEmpty()) { + IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "0"); + } + // Remove all indexs of files under a dir which is to about be deleted,but keep delete signals. + // Because our datebase need to delete those indexs one by one. + if(file.shouldRemoveIndex() && file.isDir()) { + const auto keepFile = [&file](const PendingFile& pending) { + return (!FileUtils::isOrUnder(pending.path(), file.path()) || pending.shouldRemoveIndex()); + }; + const auto end = m_cache.end(); + const auto droppedFilesBegin = std::stable_partition(m_cache.begin(), end, keepFile); + m_cache.erase(droppedFilesBegin, end); + } + + if(file.shouldRemoveIndex()) { + m_cache.removeOne(file); + } + int i = m_cache.indexOf(file); + if (i == -1) { +// qDebug() << "insert file" << file.path() << file.shouldRemoveIndex(); + m_cache << file; + } else { +// qDebug() << "merge file" << file.path() << file.shouldRemoveIndex(); + m_cache[i].merge(file); + } + + if(!m_cacheTimer->isActive()) { +// qDebug()<<"m_cacheTimer-----start!!"; +// m_cacheTimer->start(); + Q_EMIT cacheTimerStart(); + } + Q_EMIT minProcessTimerStart(); +// m_minProcessTimer->start(); +// qDebug()<<"m_minProcessTimer-----start!!"; + m_mutex.unlock(); +// qDebug() << "Current cache-------------"; +// for(PendingFile i : m_cache) { +// qDebug() << "|" << i.path(); +// qDebug() << "|" <updateIndex(&m_pendingFiles); + m_mutex.lock(); + if(m_cache.isEmpty()) { + IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2"); + } + m_mutex.unlock(); + m_pendingFiles.clear(); + m_pendingFiles.squeeze(); + malloc_trim(0); + qDebug()<< "Finish processCache!"; + return; +} diff --git a/libsearch/index/pending-file-queue.h b/libsearch/index/pending-file-queue.h new file mode 100644 index 0000000..a42adf7 --- /dev/null +++ b/libsearch/index/pending-file-queue.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2021, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: zhangpengfei + * + */ +#ifndef PENDINGFILEQUEUE_H +#define PENDINGFILEQUEUE_H + +#include +#include +#include +#include +#include +#include "pending-file.h" +#include "index-generator.h" + +namespace Zeeker { +class PendingFileQueue : public QThread +{ + Q_OBJECT +public: + static PendingFileQueue *getInstance(QObject *parent = nullptr); + + ~PendingFileQueue(); + //This method will block until current cache has been processed. + //Do not do enqueue operation in other thread while this method is running. + void forceFinish(); + void enqueue(const PendingFile& file); + QTimer *m_cacheTimer = nullptr; + QTimer *m_minProcessTimer = nullptr; + +protected: + void run() override; +Q_SIGNALS: + void cacheTimerStart(); + void minProcessTimerStart(); + void timerStop(); +private: + void processCache(); + explicit PendingFileQueue(QObject *parent = nullptr); + + QVector m_cache; + QVector m_pendingFiles; + QMutex m_mutex; + QMutex m_timeoutMutex; + + QThread *m_timerThread = nullptr; + bool m_timeout = false; + int m_enqueuetimes = 0; + +}; +} +#endif // PENDINGFILEQUEUE_H diff --git a/libsearch/index/pending-file.cpp b/libsearch/index/pending-file.cpp new file mode 100644 index 0000000..aa30720 --- /dev/null +++ b/libsearch/index/pending-file.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2021, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: zhangpengfei + * + */ +#include "pending-file.h" +using namespace Zeeker; +PendingFile::PendingFile(const QString &path) + : m_path(path) + , m_deleted(false) + , m_modified(false) + , m_isDir(false) +{ + +} + +QString PendingFile::path() const +{ + return m_path; +} + +void PendingFile::setPath(const QString& path) +{ + if (path.endsWith(QLatin1Char('/'))) { + m_path = path.mid(0, m_path.length() - 1); + return; + } + m_path = path; +} + +//bool PendingFile::isNewFile() const +//{ +// return m_created; +//} + +//bool PendingFile::shouldIndexContents() const +//{ +// if (m_created || m_modified) { +// return true; +// } +// return false; +//} + +bool PendingFile::isDir() const +{ + return m_isDir; +} + +bool PendingFile::shouldRemoveIndex() const +{ + return m_deleted; +} + +void PendingFile::merge(const PendingFile& file) +{ +// m_created |= file.m_created; + m_modified = file.m_modified; + m_deleted = file.m_deleted; +} + +void PendingFile::printFlags() const +{ +// qDebug() << "Created:" << m_created; + qDebug() << "Deleted:" << m_deleted; + qDebug() << "Modified:" << m_modified; + qDebug() << "Is dir:" << m_isDir; +} diff --git a/libsearch/index/pending-file.h b/libsearch/index/pending-file.h new file mode 100644 index 0000000..41555c6 --- /dev/null +++ b/libsearch/index/pending-file.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2021, KylinSoft Co., Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Authors: zhangpengfei + * + */ +#ifndef PENDINGFILE_H +#define PENDINGFILE_H + +#include +#include +namespace Zeeker { +/** + * Represents a file/folder which needs to be indexed. + */ +class PendingFile +{ +public: + explicit PendingFile(const QString& path = QString()); + + QString path() const; + void setPath(const QString& path); + void setIsDir(){ m_isDir = true; } + void setModified() { m_modified = true; } +// void setCreated() { m_created = true; } + void setDeleted() { m_deleted = true; } + bool shouldRemoveIndex() const; +// bool shouldIndexContents() const; + bool isDir() const; + + bool operator == (const PendingFile& rhs) const { + return (m_path == rhs.m_path); + } + + /** + * Takes a PendingFile \p file and merges its flags into + * the current PendingFile + */ + void merge(const PendingFile& file); + +private: + QString m_path; + +// bool m_created : 1; + bool m_deleted : 1; + bool m_modified : 1; + bool m_isDir : 1; + + void printFlags() const; +}; +} +#endif // PENDINGFILE_H diff --git a/libsearch/index/search-manager.cpp b/libsearch/index/search-manager.cpp index ce75385..78f2bb3 100644 --- a/libsearch/index/search-manager.cpp +++ b/libsearch/index/search-manager.cpp @@ -27,7 +27,7 @@ QMutex SearchManager::m_mutex1; QMutex SearchManager::m_mutex2; QMutex SearchManager::m_mutex3; SearchManager::SearchManager(QObject *parent) : QObject(parent) { - m_pool.setMaxThreadCount(2); + m_pool.setMaxThreadCount(3); m_pool.setExpiryTimeout(1000); } @@ -56,39 +56,54 @@ void SearchManager::onKeywordSearch(QString keyword, QQueue *searchResu ++uniqueSymbol3; m_mutex3.unlock(); - if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) { - DirectSearch *directSearch; - directSearch = new DirectSearch(keyword, searchResultFile, searchResultDir, uniqueSymbol1); - m_pool.start(directSearch); - } else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) { - FileSearch *filesearch; - filesearch = new FileSearch(searchResultFile, uniqueSymbol1, keyword, "0", 1, 0, 5); - m_pool.start(filesearch); +// if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) { +// DirectSearch *directSearch; +// directSearch = new DirectSearch(keyword, searchResultFile, searchResultDir, uniqueSymbol1); +// m_pool.start(directSearch); +// } else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) { +// FileSearch *filesearch; +// filesearch = new FileSearch(searchResultFile, uniqueSymbol1, keyword, "0", 1, 0, 5); +// m_pool.start(filesearch); - FileSearch *dirsearch; - dirsearch = new FileSearch(searchResultDir, uniqueSymbol2, keyword, "1", 1, 0, 5); - m_pool.start(dirsearch); +// FileSearch *dirsearch; +// dirsearch = new FileSearch(searchResultDir, uniqueSymbol2, keyword, "1", 1, 0, 5); +// m_pool.start(dirsearch); - FileContentSearch *contentSearch; - contentSearch = new FileContentSearch(searchResultContent, uniqueSymbol3, keyword, 0, 5); - m_pool.start(contentSearch); - } else { - qWarning() << "Unknown search method! FileUtils::searchMethod: " << static_cast(FileUtils::searchMethod); - } +// FileContentSearch *contentSearch; +// contentSearch = new FileContentSearch(searchResultContent, uniqueSymbol3, keyword, 0, 5); +// m_pool.start(contentSearch); +// } else { +// qWarning() << "Unknown search method! FileUtils::searchMethod: " << static_cast(FileUtils::searchMethod); +// } return; } bool SearchManager::isBlocked(QString &path) { QStringList blockList = GlobalSettings::getInstance()->getBlockDirs(); for(QString i : blockList) { - if(path.startsWith(i.prepend("/"))) + if(FileUtils::isOrUnder(path, i)) return true; } return false; } -FileSearch::FileSearch(QQueue *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) { +bool SearchManager::creatResultInfo(SearchPluginIface::ResultInfo &ri, QString path) +{ + QFileInfo info(path); + if(!info.exists()) { + return false; + } + ri.icon = FileUtils::getFileIcon(QUrl::fromLocalFile(path).toString()); + ri.name = info.fileName(); + ri.description = QVector() \ + << SearchPluginIface::DescriptionInfo{tr("Path:"), path} \ + << SearchPluginIface::DescriptionInfo{tr("Modified time:"), info.lastModified().toString("yyyy/MM/dd hh:mm:ss")}; + ri.actionKey = path; + ri.type = 0; + return true; +} +FileSearch::FileSearch(DataQueue *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) { this->setAutoDelete(true); m_search_result = searchResult; m_uniqueSymbol = uniqueSymbol; @@ -178,18 +193,13 @@ int FileSearch::getResult(Xapian::MSet &result) { if(SearchManager::isBlocked(path)) { continue; } - - QFileInfo info(path); - - if(!info.exists()) { -// pathTobeDelete->append(QString::fromStdString(data)); - qDebug() << path << "is not exist!!"; - } else { + SearchPluginIface::ResultInfo ri; + if(SearchManager::creatResultInfo(ri, path)) { switch(m_value.toInt()) { case 1: SearchManager::m_mutex1.lock(); if(m_uniqueSymbol == SearchManager::uniqueSymbol2) { - m_search_result->enqueue(path); + m_search_result->enqueue(ri); SearchManager::m_mutex1.unlock(); } else { SearchManager::m_mutex1.unlock(); @@ -200,7 +210,7 @@ int FileSearch::getResult(Xapian::MSet &result) { case 0: SearchManager::m_mutex2.lock(); if(m_uniqueSymbol == SearchManager::uniqueSymbol1) { - m_search_result->enqueue(path); + m_search_result->enqueue(ri); SearchManager::m_mutex2.unlock(); } else { SearchManager::m_mutex2.unlock(); @@ -210,8 +220,8 @@ int FileSearch::getResult(Xapian::MSet &result) { default: break; } - // searchResult.append(path); } + // searchResult.append(path); qDebug() << "doc=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent; } // if(!pathTobeDelete->isEmpty()) @@ -219,7 +229,7 @@ int FileSearch::getResult(Xapian::MSet &result) { return 0; } -FileContentSearch::FileContentSearch(QQueue> *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) { +FileContentSearch::FileContentSearch(DataQueue *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) { this->setAutoDelete(true); m_search_result = searchResult; m_uniqueSymbol = uniqueSymbol; @@ -280,29 +290,15 @@ int FileContentSearch::keywordSearchContent() { words.append(sKeyWord.at(i).word).append(" "); } - Xapian::Query query = qp.parse_query(words); -// Xapian::Query query = qp.parse_query(keyword.toStdString()); - - - -// QVector sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword); -// //Creat a query -// std::string words; -// for(int i=0;i v; - // for(int i=0;i v; + for(int i=0; iappend(QString::fromStdString(data)); - qDebug() << path << "is not exist!!"; + SearchPluginIface::ResultInfo ri; + if(!SearchManager::creatResultInfo(ri, path)) { continue; } // Construct snippets containing keyword. - QStringList snippets; +// QStringList snippets; // snippets.append(QString::fromStdString( result.snippet(doc.get_data(),400))); // qWarning()<enqueue(qMakePair(path, snippets)); + m_search_result->enqueue(ri); SearchManager::m_mutex3.unlock(); - snippets.clear(); - QStringList().swap(snippets); +// snippets.clear(); +// QStringList().swap(snippets); } else { SearchManager::m_mutex3.unlock(); return -1; @@ -403,12 +399,12 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) { return 0; } -DirectSearch::DirectSearch(QString keyword, QQueue *searchResultFile, QQueue *searchResultDir, size_t uniqueSymbol) { +DirectSearch::DirectSearch(QString keyword, DataQueue *searchResult, QString value, size_t uniqueSymbol) { this->setAutoDelete(true); m_keyword = keyword; - m_searchResultFile = searchResultFile; - m_searchResultDir = searchResultDir; + m_searchResult = searchResult; m_uniqueSymbol = uniqueSymbol; + m_value = value; } void DirectSearch::run() { @@ -417,8 +413,13 @@ void DirectSearch::run() { QFileInfoList list; QDir dir; // QDir::Hidden - dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot); - dir.setSorting(QDir::DirsFirst); + if(m_value == DIR_SEARCH_VALUE) { + dir.setFilter(QDir::Dirs | QDir::NoDotAndDotDot); + } else { + dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot); + dir.setSorting(QDir::DirsFirst); + } + QStringList blockList = GlobalSettings::getInstance()->getBlockDirs(); while(!bfs.empty()) { dir.setPath(bfs.dequeue()); @@ -428,7 +429,7 @@ void DirectSearch::run() { bool findIndex = false; for (QString j : blockList) { - if (i.absoluteFilePath().startsWith(j.prepend("/"))) { + if (FileUtils::isOrUnder(i.absoluteFilePath(), j)) { findIndex = true; break; } @@ -442,26 +443,22 @@ void DirectSearch::run() { bfs.enqueue(i.absoluteFilePath()); } if(i.fileName().contains(m_keyword, Qt::CaseInsensitive)) { - SearchManager::m_mutex1.lock(); // qWarning() << i.fileName() << m_keyword; - if(m_uniqueSymbol == SearchManager::uniqueSymbol1) { - // TODO - if(i.isDir() && m_searchResultDir->length() < 51) { - m_searchResultDir->enqueue(i.absoluteFilePath()); - } else if(m_searchResultFile->length() < 51) { - m_searchResultFile->enqueue(i.absoluteFilePath()); - } - SearchManager::m_mutex1.unlock(); - if(m_searchResultDir->length() > 49 && m_searchResultFile->length() > 49) { - return; - } - } else { - // TODO - // More suitable method? - m_searchResultFile->clear(); - m_searchResultDir->clear(); - SearchManager::m_mutex1.unlock(); + if(m_searchResult->length() > 49) return; + if((i.isDir() && m_value == DIR_SEARCH_VALUE) || (i.isFile() && m_value == FILE_SEARCH_VALUE)) { + SearchPluginIface::ResultInfo ri; + if(SearchManager::creatResultInfo(ri,i.absoluteFilePath())) { + SearchManager::m_mutex1.lock(); + if(m_uniqueSymbol == SearchManager::uniqueSymbol1) { + m_searchResult->enqueue(ri); + SearchManager::m_mutex1.unlock(); + } else { + SearchManager::m_mutex1.unlock(); + return; + } + + } } } } diff --git a/libsearch/index/search-manager.h b/libsearch/index/search-manager.h index c715306..e458a47 100644 --- a/libsearch/index/search-manager.h +++ b/libsearch/index/search-manager.h @@ -37,19 +37,19 @@ #include #include +#include "search-plugin-iface.h" #include "file-utils.h" #include "global-settings.h" #include "chinese-segmentation.h" - +#include "common.h" #define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString() #define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString() - namespace Zeeker { - class LIBSEARCH_EXPORT SearchManager : public QObject { friend class FileSearch; friend class FileContentSearch; + friend class DirectSearch; Q_OBJECT public: explicit SearchManager(QObject *parent = nullptr); @@ -72,33 +72,15 @@ Q_SIGNALS: void resultDir(QQueue *); void resultContent(QQueue> *); private: -// int keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value,unsigned slot = 1,int begin = 0, int num = 20); -// int keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20); - - /** - * @brief SearchManager::creatQueryForFileSearch - * This part shall be optimized frequently to provide a more stable search function. - * @param keyword - * @param db - * @return Xapian::Query - */ -// Xapian::Query creatQueryForFileSearch(QString keyword, Xapian::Database &db); -// Xapian::Query creatQueryForContentSearch(QString keyword, Xapian::Database &db); - -// int getResult(size_t uniqueSymbol, Xapian::MSet &result, QString value); -// int getContentResult(size_t uniqueSymbol, Xapian::MSet &result,std::string &keyWord); - static bool isBlocked(QString &path); + static bool creatResultInfo(Zeeker::SearchPluginIface::ResultInfo &ri, QString path); -// QQueue *m_search_result_file = nullptr; -// QQueue *m_search_result_dir = nullptr; -// QQueue> *m_search_result_content = nullptr; QThreadPool m_pool; }; class FileSearch : public QRunnable { public: - explicit FileSearch(QQueue *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot = 1, int begin = 0, int num = 20); + explicit FileSearch(DataQueue *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot = 1, int begin = 0, int num = 20); ~FileSearch(); protected: void run(); @@ -107,7 +89,7 @@ private: Xapian::Query creatQueryForFileSearch(Xapian::Database &db); int getResult(Xapian::MSet &result); - QQueue *m_search_result = nullptr; + DataQueue *m_search_result = nullptr; QString m_value; unsigned m_slot = 1; size_t m_uniqueSymbol; @@ -118,7 +100,7 @@ private: class FileContentSearch : public QRunnable { public: - explicit FileContentSearch(QQueue> *searchResult, size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20); + explicit FileContentSearch(DataQueue *searchResult, size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20); ~FileContentSearch(); protected: void run(); @@ -126,7 +108,7 @@ private: int keywordSearchContent(); int getResult(Xapian::MSet &result, std::string &keyWord); - QQueue> *m_search_result = nullptr; + DataQueue *m_search_result = nullptr; size_t m_uniqueSymbol; QString m_keyword; int m_begin = 0; @@ -135,14 +117,14 @@ private: class DirectSearch : public QRunnable { public: - explicit DirectSearch(QString keyword, QQueue *searchResultFile, QQueue *searchResultDir, size_t uniqueSymbol); + explicit DirectSearch(QString keyword, DataQueue *searchResult, QString value, size_t uniqueSymbol); protected: void run(); private: QString m_keyword; - QQueue* m_searchResultFile = nullptr; - QQueue* m_searchResultDir = nullptr; + DataQueue* m_searchResult = nullptr; size_t m_uniqueSymbol; + QString m_value; }; } diff --git a/libsearch/index/searchmethodmanager.cpp b/libsearch/index/searchmethodmanager.cpp index 0726abd..bfdc7ee 100644 --- a/libsearch/index/searchmethodmanager.cpp +++ b/libsearch/index/searchmethodmanager.cpp @@ -1,25 +1,49 @@ #include "searchmethodmanager.h" using namespace Zeeker; +SearchMethodManager::SearchMethodManager() +{ + m_iw = InotifyWatch::getInstance(HOME_PATH); +} + void SearchMethodManager::searchMethod(FileUtils::SearchMethod sm) { qWarning() << "searchMethod start: " << static_cast(sm); if(FileUtils::SearchMethod::INDEXSEARCH == sm || FileUtils::SearchMethod::DIRECTSEARCH == sm) { FileUtils::searchMethod = sm; } else { - printf("enum class error!!!\n"); qWarning("enum class error!!!\n"); } if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::_index_status) { + + // Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running. + QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search"); + if(!fifoDir.exists()) + qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath()); + + unlink(UKUI_SEARCH_PIPE_PATH); + int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777); + if(retval == -1) { + qCritical() << "creat fifo error!!"; + syslog(LOG_ERR, "creat fifo error!!\n"); + assert(false); + return; + } + qDebug() << "create fifo success\n"; qWarning() << "start first index"; -// m_fi = FirstIndex("/home/zhangzihao/Desktop"); m_fi.start(); qWarning() << "start inotify index"; // InotifyIndex ii("/home"); // ii.start(); - this->m_ii = InotifyIndex::getInstance("/home"); - if(!this->m_ii->isRunning()) { - this->m_ii->start(); +// this->m_ii = InotifyIndex::getInstance("/home"); +// if(!this->m_ii->isRunning()) { +// this->m_ii->start(); +// } + if(!this->m_iw->isRunning()) { + this->m_iw->start(); } qDebug() << "Search method has been set to INDEXSEARCH"; } + if(FileUtils::SearchMethod::DIRECTSEARCH == sm) { + m_iw->stopWatch(); + } qWarning() << "searchMethod end: " << static_cast(FileUtils::searchMethod); } diff --git a/libsearch/index/searchmethodmanager.h b/libsearch/index/searchmethodmanager.h index 1c842ca..f95421b 100644 --- a/libsearch/index/searchmethodmanager.h +++ b/libsearch/index/searchmethodmanager.h @@ -2,15 +2,17 @@ #define SEARCHMETHODMANAGER_H #include "first-index.h" -#include "inotify-index.h" +//#include "inotify-index.h" +#include "inotify-watch.h" namespace Zeeker { class SearchMethodManager { public: - SearchMethodManager() = default; + SearchMethodManager(); void searchMethod(FileUtils::SearchMethod sm); private: FirstIndex m_fi; - InotifyIndex* m_ii; +// InotifyIndex* m_ii; + InotifyWatch *m_iw = nullptr; }; } diff --git a/libsearch/index/ukui-search-qdbus.cpp b/libsearch/index/ukui-search-qdbus.cpp index b03f340..b39f4da 100644 --- a/libsearch/index/ukui-search-qdbus.cpp +++ b/libsearch/index/ukui-search-qdbus.cpp @@ -42,5 +42,15 @@ void UkuiSearchQDBus::setInotifyMaxUserWatches() { // sysctl this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2"); // /etc/sysctl.conf -// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3"); + // this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3"); +} + +int UkuiSearchQDBus::addInotifyUserInstances(int addNum) +{ + QDBusReply reply = tmpSystemQDBusInterface->call("AddInotifyMaxUserInstance", addNum); + if(reply.isValid()) { + qDebug() << "Set inotify_max_user_instances to" << reply.value(); + } else { + qWarning() << "Call AddInotifyMaxUserInstance failed!"; + } } diff --git a/libsearch/index/ukui-search-qdbus.h b/libsearch/index/ukui-search-qdbus.h index d316171..898dc53 100644 --- a/libsearch/index/ukui-search-qdbus.h +++ b/libsearch/index/ukui-search-qdbus.h @@ -21,12 +21,14 @@ #define UKUISEARCHQDBUS_H #include +#include namespace Zeeker { class UkuiSearchQDBus { public: UkuiSearchQDBus(); ~UkuiSearchQDBus(); void setInotifyMaxUserWatches(); + int addInotifyUserInstances(int addNum); private: QDBusInterface* tmpSystemQDBusInterface; }; diff --git a/libsearch/libsearch.h b/libsearch/libsearch.h index 0fc1e02..5c0bfa7 100644 --- a/libsearch/libsearch.h +++ b/libsearch/libsearch.h @@ -26,12 +26,15 @@ #include "file-utils.h" #include "global-settings.h" +#include "plugininterface/search-plugin-iface.h" +#include "plugininterface/data-queue.h" #include "index/searchmethodmanager.h" #include "index/first-index.h" #include "index/ukui-search-qdbus.h" #include "index/inotify-index.h" #include "index/search-manager.h" + namespace Zeeker { //class LIBSEARCH_EXPORT GlobalSearch { //public: diff --git a/libsearch/libsearch.pro b/libsearch/libsearch.pro index 4b85b0d..d8b8ac9 100644 --- a/libsearch/libsearch.pro +++ b/libsearch/libsearch.pro @@ -24,16 +24,17 @@ DEFINES += PLUGIN_INSTALL_DIRS='\\"$${PLUGIN_INSTALL_DIRS}\\"' # In order to do so, uncomment the following line. # You can also select to disable deprecated APIs only up to a certain version of Qt. #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 - +include(pluginmanage/plugin-manager.pri) +include(plugininterface/plugin-interface.pri) include(index/index.pri) include(parser/parser.pri)) include(appsearch/appsearch.pri) include(settingsearch/settingsearch.pri)) -include(plugininterface/plugin-interface.pri) -include(pluginmanage/plugin-manager.pri) + + LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation -LIBS += -lxapian -lquazip5 -luchardet +LIBS += -lxapian -lquazip5 -luchardet #-L/usr/local/lib/libjemalloc -ljemalloc SOURCES += \ file-utils.cpp \ @@ -52,13 +53,13 @@ HEADERS += \ RESOURCES += \ resource1.qrc \ -#TRANSLATIONS += \ -# ../translations/libsearch/libukui-search_zh_CN.ts +TRANSLATIONS += \ + ../translations/libukui-search/libukui-search_zh_CN.ts -#qm_files.path = /usr/share/ukui-search/translations/libsearch/ -#qm_files.files = $$OUT_PWD/.qm/*.qm +qm_files.path = /usr/share/ukui-search/translations/ +qm_files.files = $$OUT_PWD/.qm/*.qm -#INSTALLS += qm_files +INSTALLS += qm_files # Default rules for deployment. @@ -67,11 +68,9 @@ unix { INSTALLS += target header.path = /usr/include/ukui-search - header.files += *.h index/*.h appsearch/*.h settingsearch/*.h + header.files += *.h index/*.h appsearch/*.h settingsearch/*.h plugininterface/*.h INSTALLS += header } - - INCLUDEPATH += $$PWD/../libchinese-segmentation DEPENDPATH += $$PWD/../libchinese-segmentation diff --git a/libsearch/libukui-search-headers.pri b/libsearch/libukui-search-headers.pri new file mode 100644 index 0000000..5205df9 --- /dev/null +++ b/libsearch/libukui-search-headers.pri @@ -0,0 +1,8 @@ +INCLUDEPATH += $$PWD +INCLUDEPATH += $$PWD/plugininterface +INCLUDEPATH += $$PWD/index +INCLUDEPATH += $$PWD/parser +INCLUDEPATH += $$PWD/pluginmanage +INCLUDEPATH += $$PWD/settingsearch +INCLUDEPATH += $$PWD/appsearch + diff --git a/libsearch/parser/binary-parser.cpp b/libsearch/parser/binary-parser.cpp index 0f927c5..59e9ef4 100644 --- a/libsearch/parser/binary-parser.cpp +++ b/libsearch/parser/binary-parser.cpp @@ -4963,7 +4963,7 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS, if(bUsesUnicode) { ushort* usAucData = (ushort*)ptaucBytes; - content.append(QString::fromUtf16(usAucData).replace("\r", "")); + content.append(QString::fromUtf16(usAucData).replace("\n", "").replace("\r", " ")); usAucData = (ushort*)xfree((void*)usAucData); ptaucBytes = NULL; if(content.length() >= 682666) //20480000/3 @@ -5066,7 +5066,7 @@ int KBinaryParser:: readSSTRecord(readDataParam &rdParam, ppsInfoType PPS_info, } else { ushort* usData = (ushort*)chData; - content.append(QString::fromUtf16(usData).replace("\r", "")); + content.append(QString::fromUtf16(usData).replace("\n", "").replace("\r", " ")); usData = (ushort*)xfree((void*)usData); chData = NULL; if(content.length() >= 682666) //20480000/3 @@ -5131,7 +5131,7 @@ ULONG KBinaryParser::readPPtRecord(FILE* pFile, ppsInfoType* PPS_info, ULONG* au return -1; ushort* usData = (ushort*)chData; - content.append(QString::fromUtf16(usData).replace("\r", "")); + content.append(QString::fromUtf16(usData).replace("\n", "").replace("\r", " ")); usData = (ushort*)xfree((void*)usData); chData = NULL; @@ -5205,8 +5205,10 @@ int KBinaryParser::InitDocOle(FILE* pFile, long lFilesize, QString &content) { for(iIndex = 0, ulTmp = ulSbdStartblock; iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN; iIndex++, ulTmp = aulBBD[ulTmp]) { - if(ulTmp >= (ULONG)tBBDLen) + if(ulTmp >= (ULONG)tBBDLen) { qWarning("The Big Block Depot is damaged"); + return -1; + } aulSbdList[iIndex] = ulTmp; } @@ -5349,7 +5351,10 @@ bool KBinaryParser::RunParser(QString strFile, QString &content) { (void)fclose(pFile); return false; } - InitDocOle(pFile, lFileSize, content); + // If InitDocOle failed, -1 will be returned. + if(InitDocOle(pFile, lFileSize, content)) { + qWarning() << "InitDocOle failed!" << strFile; + } fclose(pFile); return true; } diff --git a/libsearch/plugininterface/data-queue.h b/libsearch/plugininterface/data-queue.h new file mode 100644 index 0000000..6820299 --- /dev/null +++ b/libsearch/plugininterface/data-queue.h @@ -0,0 +1,26 @@ +#ifndef DATAQUEUE_H +#define DATAQUEUE_H +#include +#include +#include +#include "libsearch_global.h" +namespace Zeeker { +// TODO I want a unlocked queue +template +class LIBSEARCH_EXPORT DataQueue : public QList +{ +public: + inline void enqueue(const T &t) { + QMutexLocker locker(&m_mutex); + QList::append(t); + } + inline T dequeue() { + QMutexLocker locker(&m_mutex); + return QList::takeFirst(); + } +private: + QMutex m_mutex; +}; +} + +#endif // DATAQUEUE_H diff --git a/libsearch/plugininterface/plugin-interface.pri b/libsearch/plugininterface/plugin-interface.pri index 694ab73..6dd0f4b 100644 --- a/libsearch/plugininterface/plugin-interface.pri +++ b/libsearch/plugininterface/plugin-interface.pri @@ -1,6 +1,9 @@ -INCLUDEPATH += $$PWD +INCLUDEPATH += $$PWD \ HEADERS += \ $$PWD/plugin-iface.h \ - $$PWD/search-plugin-iface.h + $$PWD/search-plugin-iface.h \ + $$PWD/data-queue.h + +SOURCES += diff --git a/libsearch/plugininterface/search-plugin-iface.h b/libsearch/plugininterface/search-plugin-iface.h index 4b1345a..5714a8d 100644 --- a/libsearch/plugininterface/search-plugin-iface.h +++ b/libsearch/plugininterface/search-plugin-iface.h @@ -4,8 +4,11 @@ #include #include -#include +#include +#include +#include #include "plugin-iface.h" +#include "data-queue.h" namespace Zeeker { class SearchPluginIface : public PluginInterface @@ -16,6 +19,11 @@ public: QString key; QString value; }; + struct Actioninfo + { + int actionkey; + QString displayName; + }; /** * @brief The ResultInfo struct */ @@ -24,14 +32,17 @@ public: QIcon icon; QString name; QVector description; - QStringList actionList; //all actions, take fist for double click action. - QString key; + QString actionKey; + int type; }; + virtual ~SearchPluginIface() {} virtual QString getPluginName() = 0; - virtual void KeywordSearch(QString keyword,QQueue *searchResult) = 0; - virtual void openAction(QString action, QString key) = 0; - + virtual void KeywordSearch(QString keyword,DataQueue *searchResult) = 0; + virtual QList getActioninfo(int type) = 0; + virtual void openAction(int actionkey, QString key, int type) = 0; + virtual bool isPreviewEnable(QString key, int type) = 0; + virtual QWidget *previewPage(QString key, int type, QWidget *parent = nullptr) = 0; }; } diff --git a/libsearch/pluginmanage/search-plugin-manager.cpp b/libsearch/pluginmanage/search-plugin-manager.cpp index 8adbfdb..0eaf131 100644 --- a/libsearch/pluginmanage/search-plugin-manager.cpp +++ b/libsearch/pluginmanage/search-plugin-manager.cpp @@ -1,8 +1,21 @@ +#include #include "search-plugin-manager.h" +#include "file-search-plugin.h" +#include "app-search-plugin.h"- +#include "settings-search-plugin.h" using namespace Zeeker; static SearchPluginManager *global_instance = nullptr; +SearchPluginManager::SearchPluginManager(QObject *parent) +{ + registerPlugin(new FileSearchPlugin(this)); + registerPlugin(new DirSearchPlugin(this)); + registerPlugin(new FileContengSearchPlugin(this)); + registerPlugin(new AppSearchPlugin(this)); + registerPlugin(new SettingsSearchPlugin(this)); +} + bool SearchPluginManager::registerPlugin(Zeeker::SearchPluginIface *plugin) { if (m_hash.value(plugin->name())) { @@ -35,10 +48,6 @@ void SearchPluginManager::close() this->deleteLater(); } -SearchPluginManager::SearchPluginManager(QObject *parent) -{ -} - SearchPluginManager::~SearchPluginManager() { m_hash.clear(); diff --git a/libsearch/pluginmanage/search-plugin-manager.h b/libsearch/pluginmanage/search-plugin-manager.h index 6c93d47..5722ce2 100644 --- a/libsearch/pluginmanage/search-plugin-manager.h +++ b/libsearch/pluginmanage/search-plugin-manager.h @@ -2,7 +2,7 @@ #define SEARCHPLUGINFACTORY_H #include -#include "plugininterface/search-plugin-iface.h" +#include "search-plugin-iface.h" namespace Zeeker { class SearchPluginManager : public QObject diff --git a/libsearch/settingsearch/setting-match.cpp b/libsearch/settingsearch/setting-match.cpp index 89b3c9e..69b86fd 100644 --- a/libsearch/settingsearch/setting-match.cpp +++ b/libsearch/settingsearch/setting-match.cpp @@ -59,7 +59,7 @@ void SettingsMatch::xmlElement() { while(!node.isNull()) { QDomElement element = node.toElement(); - QString key = element.attribute("name");; + QString key = element.attribute("name"); m_chine_searchResult = m_chine_searchList.value(key); m_English_searchResult = m_English_searchList.value(key); QDomNodeList list = element.childNodes(); diff --git a/libsearch/settingsearch/settings-search-plugin.cpp b/libsearch/settingsearch/settings-search-plugin.cpp new file mode 100644 index 0000000..da168a5 --- /dev/null +++ b/libsearch/settingsearch/settings-search-plugin.cpp @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include "settings-search-plugin.h" +#include "file-utils.h" +using namespace Zeeker; + +SettingsSearchPlugin::SettingsSearchPlugin(QObject *parent) : QObject(parent) +{ + SearchPluginIface::Actioninfo open { 0, tr("Open")}; + m_actionInfo << open; + m_pool.setMaxThreadCount(1); + m_pool.setExpiryTimeout(1000); + xmlElement(); +} + +const QString SettingsSearchPlugin::name() +{ + return tr("Settings Search"); +} + +const QString SettingsSearchPlugin::description() +{ + return tr("Settings search."); +} + +QString SettingsSearchPlugin::getPluginName() +{ + return tr("Settings Search"); +} + +void Zeeker::SettingsSearchPlugin::KeywordSearch(QString keyword, DataQueue *searchResult) +{ + QStringList pinyinlist; + ResultInfo resultInfo; + resultInfo.type = 0; + QLocale ql; + if (ql.language() == QLocale::Chinese) { + for (auto i = m_chineseSearchList.constBegin(); i != m_chineseSearchList.constEnd(); ++i) { + QStringList regmatch = *i; + QString key = i.key(); + for (int t = 0; t < regmatch.size(); t++) { + if (keyword == "/") + continue; + QString str = regmatch.at(t); + if (str.contains(keyword)) { + resultInfo.name = str;//中文名 + str = key + "/" + str; + resultInfo.icon = FileUtils::getSettingIcon(str, true); + resultInfo.actionKey = str; + searchResult->append(resultInfo); + continue; + } + + pinyinlist = FileUtils::findMultiToneWords(str); + for (int i = 0; i < pinyinlist.size() / 2; i++) { + str = regmatch.at(t); + QString shouzimu = pinyinlist.at(2 * i + 1); // 中文转首字母 + if (shouzimu.contains(keyword, Qt::CaseInsensitive)) { + resultInfo.name = str; + str = key + "/" + str; + resultInfo.icon = FileUtils::getSettingIcon(str, true); + resultInfo.actionKey = str; + searchResult->append(resultInfo); + break; + } + if (keyword.size() < 2) + break; + QString pinyin = pinyinlist.at(2 * i); // 中文转拼音 + if (pinyin.contains(keyword, Qt::CaseInsensitive)) { + resultInfo.name = str; + str = key + "/" + str; + resultInfo.icon = FileUtils::getSettingIcon(str, true); + resultInfo.actionKey = str; + searchResult->append(resultInfo); + break; + } + } + } + } + } + if (ql.language() == QLocale::English) { + for (auto i = m_englishSearchList.constBegin(); i != m_englishSearchList.constEnd(); ++i) { + QStringList regmatch = *i; + QString key = i.key(); + for (int t = 0; t < regmatch.size(); t++) { + if (keyword == "/") + continue; + QString str = regmatch.at(t); + if (str.contains(keyword, Qt::CaseInsensitive)) { + resultInfo.name = str; + str = key + "/" + str; + resultInfo.icon = FileUtils::getSettingIcon(str, true); + resultInfo.actionKey = str; + searchResult->append(resultInfo); + } + } + } + } +} + +QList SettingsSearchPlugin::getActioninfo(int type) +{ + return m_actionInfo; +} + +void SettingsSearchPlugin::openAction(int actionkey, QString key, int type) +{ + //TODO add some return message here. + QProcess process; + switch (actionkey) { + case 0: + //打开控制面板对应页面 + if (key.left(key.indexOf("/")).toLower() == "wallpaper") + process.startDetached(QString("ukui-control-center --background")); + else + process.startDetached(QString("ukui-control-center --%1").arg(key.left(key.indexOf("/")).toLower())); + break; + + default: + break; + } +} + +bool SettingsSearchPlugin::isPreviewEnable(QString key, int type) +{ + return false; +} + +QWidget *SettingsSearchPlugin::previewPage(QString key, int type, QWidget *parent = nullptr) +{ + return nullptr; +} + +/** + * @brief SettingsSearchPlugin::xmlElement + * 将xml文件内容读到内存 + */ +void SettingsSearchPlugin::xmlElement() { + + QString environment = QProcessEnvironment::systemEnvironment().value("XDG_SESSION_TYPE"); + QString version; + QFile file(QString::fromLocal8Bit("/usr/share/ukui-control-center/shell/res/search.xml")); + if(!file.open(QIODevice::ReadOnly)) { + return; + } + QDomDocument doc; + doc.setContent(&file); + QDomElement root = doc.documentElement(); + QDomNode node = root.previousSibling(); + node = root.firstChild(); + + QString chineseIndex; + QString englishIndex; + QStringList chineSearchResult; + QStringList englishSearchResult; + while (!node.isNull()) { + QDomElement element = node.toElement(); + QString key = element.attribute("name"); + chineSearchResult = m_chineseSearchList.value(key); + englishSearchResult = m_englishSearchList.value(key); + QDomNodeList list = element.childNodes(); + for (int i = 0; i < list.count(); ++i) { + QDomNode n = list.at(i); + + if (n.nodeName() == QString::fromLocal8Bit("Environment")) { + version=n.toElement().text(); + if ((version == "v101" && environment == "wayland") + || (version == "hw990" && environment == "x11")) { + break; + } + continue; + } + + if (n.nodeName() == QString::fromLocal8Bit("ChinesePlugin") + or n.nodeName() == QString::fromLocal8Bit("ChineseFunc")) { + chineseIndex = n.toElement().text(); + if (chineseIndex.isEmpty()) { + continue; + } + if (chineSearchResult.contains(chineseIndex)) { + continue; + } else { + chineSearchResult.append(chineseIndex); + } + } + + if (n.nodeName() == QString::fromLocal8Bit("EnglishFunc")) { + englishIndex = QString::fromLocal8Bit("/") + n.toElement().text(); + if (englishIndex.isEmpty()) { + continue; + } + englishSearchResult.append(englishIndex); + } + } + + m_chineseSearchList.insert(key, chineSearchResult); + m_englishSearchList.insert(key, englishSearchResult); + node = node.nextSibling(); + } + file.close(); +} diff --git a/libsearch/settingsearch/settings-search-plugin.h b/libsearch/settingsearch/settings-search-plugin.h new file mode 100644 index 0000000..948ae81 --- /dev/null +++ b/libsearch/settingsearch/settings-search-plugin.h @@ -0,0 +1,39 @@ +#ifndef SETTINGSSEARCHPLUGIN_H +#define SETTINGSSEARCHPLUGIN_H + +#include +#include +#include "search-plugin-iface.h" + +namespace Zeeker { +class LIBSEARCH_EXPORT SettingsSearchPlugin : public QObject, public SearchPluginIface +{ + Q_OBJECT +public: + SettingsSearchPlugin(QObject *parent = nullptr); + PluginType pluginType() {return PluginType::SearchPlugin;} + const QString name(); + const QString description(); + const QIcon icon() {return QIcon::fromTheme("folder");} + void setEnable(bool enable) {m_enable = enable;} + bool isEnable() {return m_enable;} + QString getPluginName(); + + void KeywordSearch(QString keyword,DataQueue *searchResult); + QList getActioninfo(int type); + void openAction(int actionkey, QString key, int type); + bool isPreviewEnable(QString key, int type); + QWidget *previewPage(QString key, int type, QWidget *parent); + +private: + void xmlElement(); + + QMap m_chineseSearchList; + QMap m_englishSearchList; + + bool m_enable = true; + QList m_actionInfo; + QThreadPool m_pool; +}; +} +#endif // SETTINGSSEARCHPLUGIN_H diff --git a/libsearch/settingsearch/settingsearch.pri b/libsearch/settingsearch/settingsearch.pri index b57feb7..2ca2624 100644 --- a/libsearch/settingsearch/settingsearch.pri +++ b/libsearch/settingsearch/settingsearch.pri @@ -2,6 +2,8 @@ INCLUDEPATH += $$PWD HEADERS += \ $$PWD/setting-match.h \ + $$PWD/settings-search-plugin.h SOURCES += \ $$PWD/setting-match.cpp \ + $$PWD/settings-search-plugin.cpp diff --git a/src/content-widget.cpp b/src/content-widget.cpp index e289b41..0fdcee8 100644 --- a/src/content-widget.cpp +++ b/src/content-widget.cpp @@ -32,9 +32,15 @@ ContentWidget::ContentWidget(QWidget * parent): QStackedWidget(parent) { // m_quicklyOpenList<<"/usr/share/applications/peony.desktop"<<"/usr/share/applications/ukui-control-center.desktop"<<"/usr/share/applications/ksc-defender.desktop"; m_quicklyOpenList << "/usr/share/applications/ksc-defender.desktop" << "/usr/share/applications/ukui-notebook.desktop" - << "/usr/share/applications/eom.desktop" + << "/usr/share/applications/kylin-photo-viewer.desktop" << "/usr/share/applications/pluma.desktop" << "/usr/share/applications/claws-mail.desktop" ; + if (QString::compare(FileUtils::getAppName(m_quicklyOpenList.at(2)), "Unknown App") == 0) { + m_quicklyOpenList.replace(2, "/usr/share/applications/eom.desktop"); + } + if (QString::compare(FileUtils::getAppName(m_quicklyOpenList.at(4)), "Unknown App") == 0) { + m_quicklyOpenList.replace(4, "/usr/share/applications/org.gnome.Evolution.desktop"); + } } ContentWidget::~ContentWidget() { @@ -52,9 +58,12 @@ ContentWidget::~ContentWidget() { * @brief initUI 初始化homepage和resultpage */ void ContentWidget::initUI() { + this->setFixedHeight(486); QPalette pal = palette(); - pal.setColor(QPalette::Base, QColor(0, 0, 0, 0)); + QPalette scroll_bar_pal = palette(); +// pal.setColor(QPalette::Base, QColor(0, 0, 0, 0)); pal.setColor(QPalette::Window, QColor(0, 0, 0, 0)); //使用此palette的窗口背景将为透明 + scroll_bar_pal.setColor(QPalette::Base, QColor(0, 0, 0, 0)); m_homePage = new QWidget(this); m_homePageLyt = new QVBoxLayout(m_homePage); m_homePageLyt->setSpacing(0); @@ -71,7 +80,7 @@ void ContentWidget::initUI() { m_resultDetailArea = new QScrollArea(m_resultPage); m_resultDetailArea->setHorizontalScrollBarPolicy(Qt::ScrollBarAlwaysOff); m_resultDetailArea->setVerticalScrollBarPolicy(Qt::ScrollBarAsNeeded); - m_resultListArea->setFixedWidth(244); + m_resultListArea->setFixedWidth(280); m_resultPageLyt->addWidget(m_resultListArea); m_resultPageLyt->addWidget(m_resultDetailArea); m_resultPage->setLayout(m_resultPageLyt); @@ -80,9 +89,10 @@ void ContentWidget::initUI() { m_resultDetail = new QWidget(m_resultDetailArea); m_listLyt = new QVBoxLayout(m_resultList); m_detailLyt = new QVBoxLayout(m_resultDetail); - m_resultList->setFixedWidth(236); + //需要给滚动条留出16个像素点的宽度 + m_resultList->setFixedWidth(280 - 16); m_resultList->setFixedHeight(0); - m_listLyt->setContentsMargins(0, 0, 12, 0); + m_listLyt->setContentsMargins(0, 0, 0, 0); m_listLyt->setSpacing(0); m_resultListArea->setWidget(m_resultList); m_resultListArea->setWidgetResizable(true); @@ -98,6 +108,8 @@ void ContentWidget::initUI() { m_resultDetailArea->setFrameShape(QFrame::NoFrame); m_resultListArea->setPalette(pal); m_resultDetailArea->setPalette(pal); + m_resultListArea->verticalScrollBar()->setPalette(scroll_bar_pal); + m_resultDetailArea->verticalScrollBar()->setPalette(scroll_bar_pal); this->addWidget(m_homePage); this->addWidget(m_resultPage); @@ -379,7 +391,7 @@ void ContentWidget::initHomePage() { itemWidget->setLayout(layout); for(int j = 0; j < lists.at(i).count(); j++) { HomePageItem * item = new HomePageItem(itemWidget, i, lists.at(i).at(j)); - item->setFixedSize(300, 48); + item->setFixedSize(312, 48); layout->addWidget(item, j / 2, j % 2); } if(lists.at(i).length() == 1) { @@ -413,6 +425,7 @@ void ContentWidget::initHomePage() { } itemWidgetLyt->setSpacing(6); titleLabel->setFixedHeight(24); + titleLabel->setContentsMargins(6,0,0,0); itemWidgetLyt->addWidget(titleLabel); itemWidgetLyt->addWidget(itemWidget); m_homePageLyt->addWidget(listWidget); @@ -717,7 +730,7 @@ void ContentWidget::onListViewRowChanged(SearchListView * listview, const int &t if(type == SearchItem::SearchType::Contents && !m_contentDetailList.isEmpty()) { m_detailView->isContent = true; m_detailView->setContent(m_contentDetailList.at(listview->currentIndex().row()), m_keyword); - } else if(type == SearchItem::SearchType::Best && !m_bestContent.isEmpty() && listview->currentIndex().row() == listview->getLength() - 1) { + } else if(type == SearchItem::SearchType::Best && !m_bestContent.isEmpty() && SearchItem::SearchType::Contents == m_bestList.at(listview->currentIndex().row()).first) { m_detailView->setContent(m_bestContent, m_keyword); m_detailView->isContent = true; m_detailView->setupWidget(SearchItem::SearchType::Contents, path); diff --git a/src/content-widget.h b/src/content-widget.h index d57e956..26065e8 100644 --- a/src/content-widget.h +++ b/src/content-widget.h @@ -25,6 +25,7 @@ #include #include #include +#include #include "control/search-detail-view.h" #include "home-page-item.h" #include "show-more-label.h" diff --git a/src/control/highlight-item-delegate.cpp b/src/control/highlight-item-delegate.cpp index c57e1e3..0cbc39a 100644 --- a/src/control/highlight-item-delegate.cpp +++ b/src/control/highlight-item-delegate.cpp @@ -53,7 +53,7 @@ void HighlightItemDelegate::paint(QPainter * painter, const QStyleOptionViewItem ctx.palette.setColor(QPalette::Text, optionV4.palette.color(QPalette::Active, QPalette::HighlightedText)); QRect textRect = style->subElementRect(QStyle::SE_ItemViewItemText, &optionV4); - textRect.adjust(0, -5, 0, 0); +// textRect.adjust(0, 0, 0, 0); painter->save(); painter->translate(textRect.topLeft()); painter->setClipRect(textRect.translated(-textRect.topLeft())); @@ -174,3 +174,10 @@ void HighlightItemDelegate::setSearchKeyword(const QString ®FindKeyWords) { m_regFindKeyWords.clear(); m_regFindKeyWords = regFindKeyWords; } + +QSize HighlightItemDelegate::sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const +{ + QSize size = QStyledItemDelegate::sizeHint(option,index); + size.setHeight(size.height() + 10); + return size; +} diff --git a/src/control/highlight-item-delegate.h b/src/control/highlight-item-delegate.h index cc71b72..01caa2e 100644 --- a/src/control/highlight-item-delegate.h +++ b/src/control/highlight-item-delegate.h @@ -30,6 +30,8 @@ class HighlightItemDelegate : public QStyledItemDelegate { public: explicit HighlightItemDelegate(QObject *parent = nullptr); void setSearchKeyword(const QString &); +protected: + QSize sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const; private: QString m_regFindKeyWords = 0; void paint(QPainter *, const QStyleOptionViewItem &, const QModelIndex &) const override; diff --git a/src/control/search-detail-view.cpp b/src/control/search-detail-view.cpp index 291547f..8865e81 100644 --- a/src/control/search-detail-view.cpp +++ b/src/control/search-detail-view.cpp @@ -313,25 +313,26 @@ void SearchDetailView::setupWidget(const int& type, const QString& path) { m_pathLabel_1->show(); m_pathLabel_2->show(); // m_pathLabel_2->setText(path); - QString showPath = path; - QFontMetrics fontMetrics = m_pathLabel_2->fontMetrics(); - if(fontMetrics.width(path) > m_pathLabel_2->width() - 10) { - //路径长度超过230,手动添加换行符以实现折叠 - int lastIndex = 0; - for(int i = lastIndex; i < path.length(); i++) { - if(fontMetrics.width(path.mid(lastIndex, i - lastIndex)) == m_pathLabel_2->width() - 10) { - lastIndex = i; - showPath.insert(i, '\n'); - } else if(fontMetrics.width(path.mid(lastIndex, i - lastIndex)) > m_pathLabel_2->width() - 10) { - lastIndex = i; - showPath.insert(i - 1, '\n'); - } else { - continue; - } - } - } - m_pathLabel_2->setText(showPath); - +// QString showPath = path; +// QFontMetrics fontMetrics = m_pathLabel_2->fontMetrics(); +// if(fontMetrics.width(path) > m_pathLabel_2->width() - 10) { +// //路径长度超过230,手动添加换行符以实现折叠 +// int lastIndex = 0; +// for(int i = lastIndex; i < path.length(); i++) { +// if(fontMetrics.width(path.mid(lastIndex, i - lastIndex)) == m_pathLabel_2->width() - 10) { +// lastIndex = i; +// showPath.insert(i, '\n'); +// } else if(fontMetrics.width(path.mid(lastIndex, i - lastIndex)) > m_pathLabel_2->width() - 10) { +// lastIndex = i; +// showPath.insert(i - 1, '\n'); +// } else { +// continue; +// } +// } +// } +// m_pathLabel_2->setText(showPath); + m_pathLabel_2->setText(m_pathLabel_2->fontMetrics().elidedText(path, Qt::ElideRight, m_pathLabel_2->width())); + m_pathLabel_2->setToolTip(path); m_timeLabel_1->show(); m_timeLabel_2->show(); QFileInfo fileInfo(path); @@ -465,7 +466,7 @@ void SearchDetailView::initUI() { m_layout = new QVBoxLayout(this); this->setLayout(m_layout); m_layout->setContentsMargins(16, 60, 16, 24); - this->setFixedWidth(378); + this->setFixedWidth(368); //没有网络的时候的提示信息 m_noNetFrame = new QFrame(this); @@ -487,7 +488,7 @@ void SearchDetailView::initUI() { //图标和名称、分割线区域 m_iconLabel = new QLabel(this); m_iconLabel->setAlignment(Qt::AlignCenter); - m_iconLabel->setFixedHeight(120); + m_iconLabel->setFixedHeight(128); m_nameFrame = new QFrame(this); m_nameLayout = new QHBoxLayout(m_nameFrame); m_nameLabel = new QLabel(m_nameFrame); @@ -502,7 +503,6 @@ void SearchDetailView::initUI() { m_hLine = new QFrame(this); m_hLine->setLineWidth(0); m_hLine->setFixedHeight(1); - m_hLine->setStyleSheet("QFrame{background: rgba(0,0,0,0.2);}"); m_layout->addWidget(m_iconLabel); m_layout->addWidget(m_nameFrame); m_layout->addWidget(m_hLine); @@ -527,12 +527,14 @@ void SearchDetailView::initUI() { m_pathLabel_2 = new QLabel(m_pathFrame); m_pathLabel_1->setText(tr("Path")); m_pathLabel_2->setFixedWidth(240); + m_pathLabel_2->setAlignment(Qt::AlignRight); // m_pathLabel_2->setWordWrap(true); m_pathLyt->addWidget(m_pathLabel_1); m_pathLyt->addStretch(); m_pathLyt->addWidget(m_pathLabel_2); m_timeLabel_1 = new QLabel(m_timeFrame); m_timeLabel_2 = new QLabel(m_timeFrame); + m_timeLabel_2->setAlignment(Qt::AlignRight); m_timeLabel_1->setText(tr("Last time modified")); m_timeLyt->addWidget(m_timeLabel_1); m_timeLyt->addStretch(); @@ -543,7 +545,6 @@ void SearchDetailView::initUI() { m_hLine_2 = new QFrame(this); m_hLine_2->setLineWidth(0); m_hLine_2->setFixedHeight(1); - m_hLine_2->setStyleSheet("QFrame{background: rgba(0,0,0,0.2);}"); m_layout->addWidget(m_detailFrame); m_layout->addWidget(m_hLine_2); @@ -557,6 +558,8 @@ void SearchDetailView::initUI() { m_layout->addStretch(); this->clearLayout(); //初始化时隐藏所有控件 + resetLineColor(); + connect(qApp, &QApplication::paletteChanged, this, &SearchDetailView::resetLineColor); } /** @@ -566,6 +569,17 @@ void SearchDetailView::refreshIcon() { this->setIcon(m_iconPath); } +void SearchDetailView::resetLineColor() +{ + if (GlobalSettings::getInstance()->getValue(STYLE_NAME_KEY).toString() != "ukui-dark") { + m_hLine->setStyleSheet("QFrame{background: rgba(0,0,0,0.06);}"); + m_hLine_2->setStyleSheet("QFrame{background: rgba(0,0,0,0.06);}"); + } else { + m_hLine->setStyleSheet("QFrame{background: rgba(255,255,255,0.08);}"); + m_hLine_2->setStyleSheet("QFrame{background: rgba(255,255,255,0.08);}"); + } +} + /** * @brief SearchDetailView::setIcon 设置图标区域 * @param path 图标路径或图标名 @@ -585,13 +599,13 @@ void SearchDetailView::setIcon(const QString &path, const bool &installed) icon = QIcon::fromTheme(path); } } - m_iconLabel->setPixmap(icon.pixmap(icon.actualSize(QSize(96, 96)))); + m_iconLabel->setPixmap(icon.pixmap(icon.actualSize(QSize(128, 128)))); } else if (m_type == SearchListView::ResType::Setting) { QIcon icon = FileUtils::getSettingIcon(path, true); - m_iconLabel->setPixmap(icon.pixmap(icon.actualSize(QSize(96, 96)))); + m_iconLabel->setPixmap(icon.pixmap(icon.actualSize(QSize(128, 128)))); } else { QIcon icon = FileUtils::getFileIcon(QUrl::fromLocalFile(path).toString()); - m_iconLabel->setPixmap(icon.pixmap(icon.actualSize(QSize(96, 96)))); + m_iconLabel->setPixmap(icon.pixmap(icon.actualSize(QSize(128, 128)))); } } diff --git a/src/control/search-detail-view.h b/src/control/search-detail-view.h index 0d5770a..320f042 100644 --- a/src/control/search-detail-view.h +++ b/src/control/search-detail-view.h @@ -109,6 +109,7 @@ Q_SIGNALS: private Q_SLOTS: void execActions(const int&, const int&, const QString&); void refreshIcon(); + void resetLineColor(); }; //此类用于url拦截 diff --git a/src/create-index-ask-dialog.h b/src/create-index-ask-dialog.h index 4719181..b1eb974 100644 --- a/src/create-index-ask-dialog.h +++ b/src/create-index-ask-dialog.h @@ -32,6 +32,7 @@ #include #include #include +#include namespace Zeeker { class CreateIndexAskDialog : public QDialog { diff --git a/src/input-box.cpp b/src/input-box.cpp index 416c69a..1d3fcba 100644 --- a/src/input-box.cpp +++ b/src/input-box.cpp @@ -89,7 +89,7 @@ SearchBarHLayout::~SearchBarHLayout() { * @brief 初始化ui */ void SearchBarHLayout::initUI() { - m_queryLineEdit = new SearchLineEdit(); + m_queryLineEdit = new SearchLineEdit(this->parentWidget()); m_queryLineEdit->installEventFilter(this); m_queryLineEdit->setTextMargins(30, 1, 0, 1); this->setContentsMargins(0, 0, 0, 0); @@ -106,6 +106,17 @@ void SearchBarHLayout::initUI() { m_queryWidget->setLayout(queryWidLayout); + if (!QIcon::fromTheme("system-search-symbolic").isNull()) { + QPixmap pixmap(QIcon::fromTheme("system-search-symbolic").pixmap(QSize(20, 20))); + m_queryIcon = new QLabel; + m_queryIcon->setFixedSize(pixmap.size()); + m_queryIcon->setPixmap(pixmap); + } else { + QPixmap pixmap(QIcon(":/res/icons/system-search.symbolic.png").pixmap(QSize(20, 20))); + m_queryIcon = new QLabel; + m_queryIcon->setFixedSize(pixmap.size()); + m_queryIcon->setPixmap(pixmap); + } QPixmap pixmap(QIcon::fromTheme("system-search-symbolic").pixmap(QSize(20, 20))); m_queryIcon = new QLabel; m_queryIcon->setFixedSize(pixmap.size()); @@ -139,7 +150,8 @@ void SearchBarHLayout::effectiveSearchRecord() { } void SearchBarHLayout::focusIn() { - m_queryLineEdit->setFocus(); + if (!m_queryLineEdit->hasFocus()) + m_queryLineEdit->setFocus(Qt::MouseFocusReason); } void SearchBarHLayout::focusOut() { @@ -192,7 +204,7 @@ bool SearchBarHLayout::eventFilter(QObject *watched, QEvent *event) { /** * @brief UKuiSearchLineEdit 全局搜索的输入框 */ -SearchLineEdit::SearchLineEdit() { +SearchLineEdit::SearchLineEdit(QWidget *parent) : QLineEdit(parent) { this->setFocusPolicy(Qt::ClickFocus); this->installEventFilter(this); // this->setContextMenuPolicy(Qt::NoContextMenu); diff --git a/src/input-box.h b/src/input-box.h index a8e108f..d17db8c 100644 --- a/src/input-box.h +++ b/src/input-box.h @@ -100,7 +100,7 @@ class SearchLineEdit : public QLineEdit {    */ Q_CLASSINFO("D-Bus Interface", "org.ukui.search.inputbox") public: - SearchLineEdit(); + SearchLineEdit(QWidget *parent = nullptr); void record(); ~SearchLineEdit(); diff --git a/src/main.cpp b/src/main.cpp index 54dbe8b..37674a2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -314,7 +314,7 @@ int main(int argc, char *argv[]) { // TODO // Set threads which in global thread pool expiry time in 5ms, some prolems here - QThreadPool::globalInstance()->setExpiryTimeout(5); +// QThreadPool::globalInstance()->setExpiryTimeout(5); // TODO // First insdex start, the parameter us useless, should remove the parameter diff --git a/src/mainwindow.cpp b/src/mainwindow.cpp index 305366a..cf80e43 100644 --- a/src/mainwindow.cpp +++ b/src/mainwindow.cpp @@ -110,7 +110,10 @@ MainWindow::MainWindow(QWidget *parent) : }); m_sys_tray_icon = new QSystemTrayIcon(this); - m_sys_tray_icon->setIcon(QIcon::fromTheme("system-search-symbolic")); + if (!QIcon::fromTheme("system-search-symbolic").isNull()) + m_sys_tray_icon->setIcon(QIcon::fromTheme("system-search-symbolic")); + else + m_sys_tray_icon->setIcon(QIcon(":/res/icons/system-search.symbolic.png")); m_sys_tray_icon->setToolTip(tr("Global Search")); m_sys_tray_icon->show(); connect(m_sys_tray_icon, &QSystemTrayIcon::activated, this, [ = ](QSystemTrayIcon::ActivationReason reason) { @@ -126,6 +129,8 @@ MainWindow::MainWindow(QWidget *parent) : this->m_searchLayout->focusIn(); //打开主界面时输入框夺焦,可直接输入 this->raise(); this->activateWindow(); + } else if(this->isVisible()&&!this->isActiveWindow()) { + this->activateWindow(); } else { tryHideMainwindow(); } @@ -171,7 +176,7 @@ MainWindow::~MainWindow() { * 设置本窗口的大小 this->setFixedSize(640, 640); */ void MainWindow::initUi() { - this->setFixedSize(640, 590); + this->setFixedSize(680, 590); m_frame = new QFrame(this); @@ -271,16 +276,7 @@ void MainWindow::initUi() { } else { m_contentFrame->setCurrentIndex(1); QTimer::singleShot(10, this, [ = ]() { - m_search_result_file->clear(); - m_search_result_dir->clear(); - m_search_result_content->clear(); - if(! m_search_result_thread->isRunning()) { - m_search_result_thread->start(); - } startSearch(text); - //允许弹窗且当前次搜索(为关闭主界面,算一次搜索过程)未询问且当前为暴力搜索 - if(GlobalSettings::getInstance()->getValue(ENABLE_CREATE_INDEX_ASK_DIALOG).toString() != "false" && !m_currentSearchAsked && FileUtils::searchMethod == FileUtils::SearchMethod::DIRECTSEARCH) - m_askTimer->start(); }); } m_researchTimer->stop(); //如果搜索内容发生改变,则停止建索引后重新搜索的倒计时 @@ -383,8 +379,21 @@ void MainWindow::primaryScreenChangedSlot(QScreen *screen) { * @param keyword */ void MainWindow::startSearch(QString keyword) { + m_search_result_file->clear(); + m_search_result_dir->clear(); + m_search_result_content->clear(); + if(! m_search_result_thread->isRunning()) { + m_search_result_thread->start(); + } + //允许弹窗且当前次搜索(为关闭主界面,算一次搜索过程)未询问且当前为暴力搜索 + if(GlobalSettings::getInstance()->getValue(ENABLE_CREATE_INDEX_ASK_DIALOG).toString() != "false" && !m_currentSearchAsked && FileUtils::searchMethod == FileUtils::SearchMethod::DIRECTSEARCH) + m_askTimer->start(); + m_contentFrame->setKeyword(keyword); + //文件、文件夹、内容搜索 + this->m_searcher->onKeywordSearch(keyword, m_search_result_file, m_search_result_dir, m_search_result_content); + //设置搜索 QStringList settingList; settingList = m_settingsMatch->startMatchApp(keyword); @@ -394,9 +403,6 @@ void MainWindow::startSearch(QString keyword) { //应用搜索 // m_seach_app_thread->stop(); m_seach_app_thread->startSearch(keyword); - - //文件、文件夹、内容搜索 - this->m_searcher->onKeywordSearch(keyword, m_search_result_file, m_search_result_dir, m_search_result_content); } /** diff --git a/src/res/icons/system-search.symbolic.png b/src/res/icons/system-search.symbolic.png new file mode 100644 index 0000000..170e2a2 Binary files /dev/null and b/src/res/icons/system-search.symbolic.png differ diff --git a/src/resource.qrc b/src/resource.qrc index 018ebdd..8f84cfb 100644 --- a/src/resource.qrc +++ b/src/resource.qrc @@ -5,5 +5,6 @@ res/icons/close.svg res/qt-translations/qt_zh_CN.qm res/icons/net-disconnected.svg + res/icons/system-search.symbolic.png diff --git a/src/search-app-thread.cpp b/src/search-app-thread.cpp index aadea1e..c5d482b 100644 --- a/src/search-app-thread.cpp +++ b/src/search-app-thread.cpp @@ -5,21 +5,32 @@ size_t uniqueSymbol = 0; QMutex m_mutex; SearchAppThread::SearchAppThread(QObject *parent) : QObject(parent) { - m_pool.setMaxThreadCount(1); + m_pool.setMaxThreadCount(2); m_pool.setExpiryTimeout(1000); } void SearchAppThread::startSearch(const QString & keyword) { + m_mutex.lock(); + ++uniqueSymbol; + m_mutex.unlock(); SearchApp *appsearch; - appsearch = new SearchApp(keyword); + appsearch = new SearchApp(keyword,uniqueSymbol,this); // appsearch->setKeyword(keyword); - connect(appsearch, &SearchApp::searchResultApp, this, &SearchAppThread::searchResultApp); +// connect(appsearch, &SearchApp::searchResultApp, this, &SearchAppThread::searchResultApp); m_pool.start(appsearch); } +void SearchAppThread::sendResult(const QVector result) +{ + Q_EMIT this->searchResultApp(result); +} -SearchApp::SearchApp(const QString& keyword, QObject * parent) : QObject(parent) { + +SearchApp::SearchApp(const QString& keyword, size_t uniqueSymbol, QObject * parent) : QObject(parent) { + this->setAutoDelete(true); + m_searchappThread = qobject_cast(parent); m_keyword = keyword; + m_uniqueSymbol = uniqueSymbol; } SearchApp::~SearchApp() { @@ -35,11 +46,6 @@ SearchApp::~SearchApp() { //} void SearchApp::run() { - m_mutex.lock(); - size_t tmp_uniqueSymbol; - uniqueSymbol++; - tmp_uniqueSymbol = uniqueSymbol; - m_mutex.unlock(); //nameList:应用名,pathList:已安装的是.desktop路径,未安装为空,iconList:已安装的是图标名,未安装的是图标路径 QStringList nameList, pathList, iconList, descList; QVector appVector; @@ -70,8 +76,9 @@ void SearchApp::run() { appVector.append(iconList); appVector.append(descList); m_mutex.lock(); - if (tmp_uniqueSymbol == uniqueSymbol) { - Q_EMIT this->searchResultApp(appVector); + if (m_uniqueSymbol == uniqueSymbol) { + QMetaObject::invokeMethod(m_searchappThread, "sendResult", Q_ARG(const QVector, appVector)); +// Q_EMIT this->searchResultApp(appVector); } m_mutex.unlock(); m_installed_apps.clear(); diff --git a/src/search-app-thread.h b/src/search-app-thread.h index 46cf491..9497ee5 100644 --- a/src/search-app-thread.h +++ b/src/search-app-thread.h @@ -13,27 +13,30 @@ public: SearchAppThread(QObject * parent = nullptr); ~SearchAppThread() = default; void startSearch(const QString&); + Q_INVOKABLE void sendResult(const QVector result); private: QThreadPool m_pool; Q_SIGNALS: - void searchResultApp(const QVector&); + void searchResultApp(const QVector); }; class SearchApp : public QObject, public QRunnable { Q_OBJECT public: - SearchApp(const QString& keyword, QObject * parent = nullptr); + SearchApp(const QString& keyword, size_t uniqueSymbol, QObject * parent = nullptr); ~SearchApp(); // void setKeyword(const QString&); protected: void run() override; private: + SearchAppThread *m_searchappThread = nullptr; QString m_keyword; + size_t m_uniqueSymbol; QMap m_installed_apps; QMap m_uninstalled_apps; -Q_SIGNALS: - void searchResultApp(const QVector&); +//Q_SIGNALS: +// void searchResultApp(const QVector&); }; } diff --git a/src/src.pro b/src/src.pro index e207640..0a52d82 100644 --- a/src/src.pro +++ b/src/src.pro @@ -9,7 +9,7 @@ TEMPLATE = app PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 CONFIG += c++11 link_pkgconfig no_keywords lrelease LIBS += -lxapian -lgsettings-qt -lquazip5 -lX11 -LIBS += -lukui-log4qt +LIBS += -lukui-log4qt #-L/usr/local/lib/libjemalloc -ljemalloc # The following define makes your compiler emit warnings if you use # any Qt feature that has been marked deprecated (the exact warnings # depend on your compiler). Please consult the documentation of the @@ -20,11 +20,12 @@ DEFINES += QT_DEPRECATED_WARNINGS # In order to do so, uncomment the following line. # You can also select to disable deprecated APIs only up to a certain version of Qt. #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 - +include(../libsearch/libukui-search-headers.pri) include(model/model.pri) include(control/control.pri) include(singleapplication/qt-single-application.pri) + SOURCES += \ content-widget.cpp \ create-index-ask-dialog.cpp \ diff --git a/translations/libukui-search/libukui-search_zh_CN.ts b/translations/libukui-search/libukui-search_zh_CN.ts new file mode 100644 index 0000000..a7c6f64 --- /dev/null +++ b/translations/libukui-search/libukui-search_zh_CN.ts @@ -0,0 +1,169 @@ + + + + + Zeeker::AppSearch + + + Application Description: + 应用描述: + + + + Zeeker::AppSearchPlugin + + + Open + 打开 + + + + Add Shortcut to Desktop + 添加到桌面快捷方式 + + + + Add Shortcut to Panel + 添加到任务栏快捷方式 + + + + Install + 安装 + + + + + + Applications Search + 应用搜索 + + + Application Description: + 应用描述: + + + + Zeeker::DirSearchPlugin + + + Open + 打开 + + + + Open path + 打开文件所在路径 + + + + Copy Path + 复制文件路径 + + + + + Dir Search + 目录搜索 + + + + Dir search. + 目录搜索。 + + + + Zeeker::FileContengSearchPlugin + + + Open + 打开 + + + + Open path + 打开文件所在路径 + + + + Copy Path + 复制文件路径 + + + + File Content Search + 文本内容搜索 + + + + File content search. + 文本内容搜索。 + + + + File content search + 文本内容搜索 + + + + Zeeker::FileSearchPlugin + + + Open + 打开 + + + + Open path + 打开文件所在路径 + + + + Copy Path + 复制文件路径 + + + + + File Search + 文件搜索 + + + + File search. + 文件搜索。 + + + + Zeeker::SearchManager + + + Path: + 路径: + + + + Modified time: + 修改时间: + + + + Zeeker::SettingsSearchPlugin + + + Open + 打开 + + + + + Settings Search + 设置 + + + + Settings search. + 设置。 + + + diff --git a/translations/ukui-search/bo.ts b/translations/ukui-search/bo.ts index 121582c..faec90e 100644 --- a/translations/ukui-search/bo.ts +++ b/translations/ukui-search/bo.ts @@ -4,98 +4,40 @@ QObject - + ukui-search is already running! - - Zeeker::ContentWidget - - - Recently Opened - - - - - Open Quickly - - - - - Commonly Used - - - - - Apps - - - - - Settings - - - - - Files - - - - - Dirs - - - - - File Contents - - - - - Best Matches - - - - - Web Pages - - - - - Unknown - - - Zeeker::CreateIndexAskDialog - + ukui-search - + Search - + Creating index can help you getting results quickly, whether to create or not? - + Don't remind - + No - + Yes @@ -103,260 +45,212 @@ Zeeker::FolderListItem - + Delete the folder out of blacklist + + Zeeker::HomePage + + + Open Quickly + + + + + Recently Opened + + + + + Commonly Used + + + Zeeker::MainWindow - + ukui-search - + Global Search - + Search - - Zeeker::OptionView - - - Open - - - - - Add Shortcut to Desktop - - - - - Add Shortcut to Panel - - - - - Open path - - - - - Copy path - - - - - Install - - - Zeeker::SearchBarHLayout - + Search - - Zeeker::SearchDetailView - - - Introduction: %1 - - - - - Application - - - - - Document - - - - - Preview is not avaliable - - - - - Path - - - - - Last time modified - - - Zeeker::SettingsWidget - + ukui-search-settings - - - + + + Search - + <h2>Settings</h2> - + <h3>Index State</h3> - - + + ... - + <h3>File Index Settings</h3> - + Following folders will not be searched. You can set it by adding and removing folders. - + Add ignored folders - + <h3>Search Engine Settings</h3> - + Please select search engine you preferred. - + baidu - + sougou - + 360 - + Whether to delete this directory? - + Yes - + No - + Creating ... - + Done - + Index Entry: %1 - + Directories - + select blocked folder - + Select - + Position: - + FileName: - + FileType: - + Cancel - + Choosen path is Empty! - + Choosen path is not in "home"! - + Its' parent folder has been blocked! - + Set blocked folder failed! - + OK @@ -364,36 +258,16 @@ Zeeker::ShowMoreLabel - - - + + + Show More... - + Retract - - - Loading - - - - - Loading. - - - - - Loading.. - - - - - Loading... - - diff --git a/translations/ukui-search/tr.ts b/translations/ukui-search/tr.ts index 2b68a36..cde87fc 100644 --- a/translations/ukui-search/tr.ts +++ b/translations/ukui-search/tr.ts @@ -99,7 +99,7 @@ QObject - + ukui-search is already running! ukui-bul zaten çalışıyor! @@ -247,90 +247,75 @@ Zeeker::ContentWidget - Recently Opened - Yeni Açılan + Yeni Açılan - Open Quickly - Hızlı Aç + Hızlı Aç - Commonly Used - Genel olarak kullanılan + Genel olarak kullanılan - Apps - Uygulamalar + Uygulamalar - Settings - Ayarlar + Ayarlar - Files - Dosyalar + Dosyalar - Dirs - Dizinler + Dizinler - File Contents - Dosya İçeriği + Dosya İçeriği - Best Matches - En İyi Eşleşen + En İyi Eşleşen - - Web Pages - - - - Unknown - Bilinmeyen + Bilinmeyen Zeeker::CreateIndexAskDialog - + ukui-search - + Search Ara - + Creating index can help you getting results quickly, whether to create or not? - + Don't remind - + No - + Yes @@ -338,25 +323,43 @@ Zeeker::FolderListItem - + Delete the folder out of blacklist Klasörü kara listeden silin + + Zeeker::HomePage + + + Open Quickly + Hızlı Aç + + + + Recently Opened + Yeni Açılan + + + + Commonly Used + Genel olarak kullanılan + + Zeeker::MainWindow - + ukui-search - + Global Search Genel Arama - + Search Ara @@ -364,40 +367,30 @@ Zeeker::OptionView - Open - + - Add Shortcut to Desktop - Masaüstüne Kısayol Ekle + Masaüstüne Kısayol Ekle - Add Shortcut to Panel - Panele Kısayol Ekle + Panele Kısayol Ekle - Open path - Yolu aç + Yolu aç - Copy path - Yolu kopyala - - - - Install - + Yolu kopyala Zeeker::SearchBarHLayout - + Search Ara @@ -405,193 +398,179 @@ Zeeker::SearchDetailView - - Introduction: %1 - - - - Application - Uygulama + Uygulama - Document - Belge + Belge - - Preview is not avaliable - - - - Path - Yol + Yol - Last time modified - Son değiştirilme zamanı + Son değiştirilme zamanı Zeeker::SettingsWidget - + ukui-search-settings - - - + + + Search Ara - + <h2>Settings</h2> <h2>Ayarlar</h2> - + <h3>Index State</h3> <h3>Dizin Durumu</h3> - - + + ... ... - + <h3>File Index Settings</h3> <h3>Dosya Dizini Ayarları</h3> - + Following folders will not be searched. You can set it by adding and removing folders. Aşağıdaki klasörler aranmayacaktır. Klasör ekleyip kaldırarak ayarlayabilirsiniz. - + Add ignored folders Göz ardı edilen klasörleri ekleyin - + <h3>Search Engine Settings</h3> <h3>SArama Motoru Ayarları</h3> - + Please select search engine you preferred. Lütfen tercih ettiğiniz arama motorunu seçin. - + baidu - + sougou - + 360 - + Whether to delete this directory? Bu dizini silinsin mi? - + Yes - + No - + Creating ... Oluşturuluyor... - + Done Tamam - + Index Entry: %1 Dizin Girişi: %1 - + Directories Dizinler - + select blocked folder engellenen klasörü seç - + Select Seç - + Position: Pozisyon: - + FileName: Dosya Adı: - + FileType: Dosya Türü: - + Cancel İptal - + Choosen path is Empty! - + Choosen path is not in "home"! - + Its' parent folder has been blocked! - + Set blocked folder failed! - + OK @@ -599,36 +578,32 @@ Zeeker::ShowMoreLabel - - - + + + Show More... Daha Fazla Göster... - + Retract Geri çek - Loading - Yükleniyor + Yükleniyor - Loading. - Yükleniyor. + Yükleniyor. - Loading.. - Yükleniyor.. + Yükleniyor.. - Loading... - Yükleniyor... + Yükleniyor... diff --git a/translations/ukui-search/zh_CN.ts b/translations/ukui-search/zh_CN.ts index 3b34cf8..afa7fe0 100644 --- a/translations/ukui-search/zh_CN.ts +++ b/translations/ukui-search/zh_CN.ts @@ -4,7 +4,7 @@ QObject - + ukui-search is already running! @@ -12,90 +12,79 @@ Zeeker::ContentWidget - Recently Opened - 最近 + 最近 - Open Quickly - 快速入口 + 快速入口 - Commonly Used - 常用 + 常用 - Apps - 应用 + 应用 - Settings - 配置项 + 配置项 - Files - 文件 + 文件 - Dirs - 文件夹 + 文件夹 - File Contents - 文件内容 + 文件内容 - Best Matches - 最佳匹配 + 最佳匹配 - Web Pages - 网页 + 网页 - Unknown - 未知 + 未知 Zeeker::CreateIndexAskDialog - + ukui-search 搜索 - + Search 搜索 - + Creating index can help you getting results quickly, whether to create or not? 创建索引可以快速获取搜索结果,是否创建? - + Don't remind 不再提醒 - + No 否(N) - + Yes 是(Y) @@ -103,25 +92,43 @@ Zeeker::FolderListItem - + Delete the folder out of blacklist 删除 + + Zeeker::HomePage + + + Open Quickly + 快速入口 + + + + Recently Opened + 最近 + + + + Commonly Used + 常用 + + Zeeker::MainWindow - + ukui-search 搜索 - + Global Search 搜索 - + Search 搜索 @@ -129,40 +136,34 @@ Zeeker::OptionView - Open - 打开 + 打开 - Add Shortcut to Desktop - 添加到桌面快捷方式 + 添加到桌面快捷方式 - Add Shortcut to Panel - 添加到任务栏快捷方式 + 添加到任务栏快捷方式 - Open path - 打开文件所在路径 + 打开文件所在路径 - Copy path - 复制文件路径 + 复制文件路径 - Install - 安装 + 安装 Zeeker::SearchBarHLayout - + Search 搜索 @@ -170,193 +171,187 @@ Zeeker::SearchDetailView - Introduction: %1 - 软件介绍: %1 + 软件介绍: %1 - Application - 应用 + 应用 - Document - 文件 + 文件 - Preview is not avaliable - 当前预览不可用 + 当前预览不可用 - Path - 路径 + 路径 - Last time modified - 上次修改时间 + 上次修改时间 Zeeker::SettingsWidget - + ukui-search-settings 搜索 - - - + + + Search 搜索 - + <h2>Settings</h2> <h2>设置</h2> - + <h3>Index State</h3> <h3>索引状态</h3> - - + + ... - + <h3>File Index Settings</h3> <h3>文件索引设置</h3> - + Following folders will not be searched. You can set it by adding and removing folders. 搜索将不再查看以下文件夹。通过增加和删除文件夹可进行文件索引设置。 - + Add ignored folders 添加文件夹至黑名单 - + <h3>Search Engine Settings</h3> <h3>搜索引擎设置</h3> - + Please select search engine you preferred. 设置互联网搜索引擎 - + baidu 百度 - + sougou 搜狗 - + 360 360 - + Whether to delete this directory? 是否要删除此目录 - + Yes 是(Y) - + No 否(N) - + Creating ... 正在索引 - + Done 索引完成 - + Index Entry: %1 索引项: %1 - + Directories 文件夹 - + select blocked folder 选择屏蔽文件夹 - + Select 选择 - + Position: 位置: - + FileName: 名称: - + FileType: 类型: - + Cancel 取消 - + Choosen path is Empty! 选择的路径不存在! - + Choosen path is not in "home"! 请选择家目录下的文件夹! - + Its' parent folder has been blocked! 父文件夹已被屏蔽! - + Set blocked folder failed! - + OK 好的 @@ -364,36 +359,32 @@ Zeeker::ShowMoreLabel - - - + + + Show More... 显示更多... - + Retract 收起 - Loading - 加载中 + 加载中 - Loading. - 加载中. + 加载中. - Loading.. - 加载中.. + 加载中.. - Loading... - 加载中... + 加载中... diff --git a/ukui-search.pro b/ukui-search.pro index f23af01..d2a9e6a 100644 --- a/ukui-search.pro +++ b/ukui-search.pro @@ -17,9 +17,10 @@ DEFINES += QT_DEPRECATED_WARNINGS libsearch.depends += libchinese-segmentation src.depends = libsearch +frontend.depends = libsearch CONFIG += ordered \ - qt QT += widgets + diff --git a/ukuisearch-systemdbus/sysdbusregister.cpp b/ukuisearch-systemdbus/sysdbusregister.cpp index 194e59f..9303d25 100644 --- a/ukuisearch-systemdbus/sysdbusregister.cpp +++ b/ukuisearch-systemdbus/sysdbusregister.cpp @@ -102,6 +102,36 @@ QString SysdbusRegister::setInotifyMaxUserWatchesStep3() { return QString(ba); } +int SysdbusRegister::AddInotifyMaxUserInstance(int addNum) +{ + QFile file("/proc/sys/fs/inotify/max_user_instances"); + if(!file.open(QIODevice::ReadOnly | QIODevice::Text)) + return -1; + QTextStream ts(&file); + QString s = ts.read(512); + int instances = s.toInt() + addNum; + + QByteArray ba; + FILE * fp = NULL; + char cmd[128]; + char buf[1024]; + sprintf(cmd, "sysctl -w fs.inotify.max_user_instances=\"%d\"", instances); + if((fp = popen(cmd, "r")) != NULL) { + rewind(fp); + while(!feof(fp)) { + fgets(buf, sizeof(buf), fp); + ba.append(buf); + } + pclose(fp); + fp = NULL; + } else { + qWarning() << "popen open failed"; + return -1; + } + return instances; + +} + //The following example comes from control center //void SysdbusRegister::setAutoLoginStatus(QString username) { diff --git a/ukuisearch-systemdbus/sysdbusregister.h b/ukuisearch-systemdbus/sysdbusregister.h index caa9bc7..a52d75d 100644 --- a/ukuisearch-systemdbus/sysdbusregister.h +++ b/ukuisearch-systemdbus/sysdbusregister.h @@ -52,6 +52,7 @@ public slots: Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep1(); Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep2(); Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep3(); + Q_SCRIPTABLE int AddInotifyMaxUserInstance(int addNum); // // 设置免密登录状态 // Q_SCRIPTABLE void setNoPwdLoginStatus();