Merge remote-tracking branch 'upstream-gitee/openkylin/yangtze' into packaging/openkylin/yangtze

2022-10-24 10:45:13 +08:00 · 2022-10-24 10:45:13 +08:00 · 1832b01721
parent 952d82fd2c 6d78f95c13
commit 1832b01721
348 changed files with 459999 additions and 391548 deletions
--- a/data/org.ukui.search.data.gschema.xml
+++ b/data/org.ukui.search.data.gschema.xml
@ -1,9 +1,9 @@
 <schemalist gettext-domain="ukui-search">
    <schema id="org.ukui.search.settings" path="/org/ukui/ukui-search/settings/">
-        <key name="index-search" type="b">
+        <key name="file-index-enable" type="b">
            <default>false</default>
-            <summary>search method</summary>
-            <description>Is current search-method index-search.</description>
+            <summary>file index switch</summary>
+            <description>Enable or disable file index service.</description>
        </key>
        <key name="web-engine" type="s">
            <default>"baidu"</default>
--- a/data/ukui-search-app-data-service.desktop
+++ b/data/ukui-search-app-data-service.desktop
@ -0,0 +1,16 @@
+[Desktop Entry]
+Name=ukui-search-app-data-service
+Name[zh_CN]=应用数据搜索服务
+GenericName=ukui-search-app-data-service
+GenericName[zh_CN]=应用数据搜索服务
+Comment=ukui-search-app-data-service
+Comment[zh_CN]=应用数据搜索服务
+Exec=/usr/bin/ukui-search-app-data-service %U
+Type=Application
+Icon=kylin-search
+X-UKUI-AutoRestart=true
+OnlyShowIn=UKUI
+NoDisplay=true
+X-UKUI-Autostart-Phase=Application
+Terminal=false
+
--- a/data/ukui-search-service-dir-manager.desktop
+++ b/data/ukui-search-service-dir-manager.desktop
@ -0,0 +1,15 @@
+[Desktop Entry]
+Name=ukui-search-service-dir-manager
+Name[zh_CN]=搜索服务目录管理
+GenericName=ukui-search-service-dir-manager
+GenericName[zh_CN]=搜索服务目录管理
+Comment=ukui-search-service-dir-manager
+Comment[zh_CN]=搜索服务目录管理
+Exec=/usr/bin/ukui-search-service-dir-manager %U
+Type=Application
+Icon=kylin-search
+X-UKUI-AutoRestart=true
+OnlyShowIn=UKUI
+NoDisplay=true
+X-UKUI-Autostart-Phase=Application
+Terminal=false
--- a/debian/source/format
+++ b/debian/source/format
@ -1 +1 @@
-3.0 (quilt)
+3.0 (native)
--- a/frontend/control/create-index-ask-dialog.cpp
+++ b/frontend/control/create-index-ask-dialog.cpp
@ -21,7 +21,7 @@

 #include "create-index-ask-dialog.h"
 #include <QPainterPath>
-#include "kwindowsystem.h"
+#include <KWindowSystem>

 #define MAIN_SIZE QSize(380, 202)
 #define MAIN_SPACING 0
--- a/frontend/control/search-line-edit.cpp
+++ b/frontend/control/search-line-edit.cpp
@ -45,7 +45,7 @@ SearchLineEdit::SearchLineEdit(QWidget *parent) : QLineEdit(parent) {
        pixmap =  QPixmap(QIcon(":/res/icons/system-search.symbolic.png").pixmap(QSize(18, 18)));
    }
    m_queryIcon->setProperty("useIconHighlightEffect", 0x10);
-    m_queryIcon->setFixedSize(pixmap.size());
+    m_queryIcon->setFixedSize(pixmap.size() / pixmap.devicePixelRatio());
    m_queryIcon->setPixmap(pixmap);

    m_ly = new QHBoxLayout(this);
@ -75,6 +75,11 @@ SearchLineEdit::SearchLineEdit(QWidget *parent) : QLineEdit(parent) {
            m_timer->start(0.1 * 1000);
        }
    });
+
+    //跟随主题透明度变化
+    connect(qApp, &QApplication::paletteChanged, this, [=]() {
+        update();
+    });
 }

 SearchLineEdit::~SearchLineEdit() {
--- a/frontend/control/settings-widget.h
+++ b/frontend/control/settings-widget.h
@ -32,7 +32,7 @@
 #include <QPushButton>
 #include <QScrollArea>
 #include <QTimer>
-#include <libsearch.h>
+#include "libsearch.h"
 #if (QT_VERSION >= QT_VERSION_CHECK(5, 12, 0))
 #include "xatom-helper.h"
 #endif
--- a/frontend/control/stack-pages/search-page-section.cpp
+++ b/frontend/control/stack-pages/search-page-section.cpp
@ -341,24 +341,25 @@ void ResultArea::mouseReleaseEvent(QMouseEvent *event)

 bool ResultArea::viewportEvent(QEvent *event)
 {
-    if(event->type() == QEvent::TouchBegin) {
-        QTouchEvent *e = dynamic_cast<QTouchEvent *>(event);
-        if(e->touchPoints().size() == 1) {
-            m_pressPoint = m_widget->mapFrom(this, e->touchPoints().at(0).pos().toPoint());
+    if (event->type() == QEvent::MouseButtonPress) {
+        QMouseEvent *e = dynamic_cast<QMouseEvent *>(event);
+        if (e->source() == Qt::MouseEventSynthesizedByApplication) {
+            qDebug() << "MouseButtonPress MouseEventSynthesizedByApplication";
+            m_pressPoint = m_widget->mapFrom(this, e->pos());
            event->accept();
            return true;
        }
-    } else if (event->type() == QEvent::TouchUpdate) {
-        QTouchEvent *e = dynamic_cast<QTouchEvent *>(event);
-//        qDebug() << "touchpoint===========" << e->touchPoints().size();
-        if(e->touchPoints().size() == 1) {
-            int delta = m_pressPoint.y() - m_widget->mapFrom(this, e->touchPoints().at(0).pos().toPoint()).y();
-//            qDebug() << "last pos:" << m_pressPoint.y();
-//            qDebug() << "new pos:" << m_widget->mapFrom(this, e->touchPoints().at(0).pos().toPoint()).y();
-//            qDebug() << "delta" << delta;
-//            qDebug() << "height" << m_widget->height() << "--" << verticalScrollBar()->maximum();
+    } else if (event->type() == QEvent::MouseMove) {
+        QMouseEvent *e = dynamic_cast<QMouseEvent *>(event);
+        if (e->source() == Qt::MouseEventSynthesizedByApplication) {
+            qDebug() << "MouseMove MouseEventSynthesizedByApplication";
+            int delta = m_pressPoint.y() - m_widget->mapFrom(this, e->pos()).y();
+            //            qDebug() << "last pos:" << m_pressPoint.y();
+            //            qDebug() << "new pos:" << m_widget->mapFrom(this, e->touchPoints().at(0).pos().toPoint()).y();
+            //            qDebug() << "delta" << delta;
+            //            qDebug() << "value" << verticalScrollBar()->value() << "--" << verticalScrollBar()->value() + delta;
            this->verticalScrollBar()->setValue(verticalScrollBar()->value() + delta);
-            m_pressPoint = m_widget->mapFrom(this,e->touchPoints().at(0).pos().toPoint());
+            m_pressPoint = m_widget->mapFrom(this,e->pos());
            return true;
        }
    }
@ -458,12 +459,13 @@ void ResultArea::initConnections()
    connect(this->m_titleLabel, &TitleLabel::retractClicked, this, [=] () {
        Q_FOREACH(auto widget, m_widget_list) {
            if (widget->pluginName() == m_titleLabel->text()) {
-                widget->reduceListSlot();
-                widget->resetTitleLabel();
                if (!m_titleLabel->isHidden()) {
                    m_titleLabel->hide();
                    this->setViewportMargins(0,0,0,0);
                }
+                widget->reduceListSlot();
+                this->verticalScrollBar()->setValue(widget->pos().ry());
+                widget->resetTitleLabel();
            }
        }
    });
@ -510,8 +512,8 @@ void ResultArea::setupConnectionsForWidget(ResultWidget *widget)
    });
    connect(widget, &ResultWidget::retractClicked, this, [=] () {//点击收起搜索结果后
        if (!m_titleLabel->isHidden()) {
-            m_titleLabel->hide();
            this->setViewportMargins(0,0,0,0);
+            m_titleLabel->hide();
        }
    });
    connect(widget, &ResultWidget::sendBestListData, m_bestListWidget, &BestListWidget::sendBestListData);
--- a/frontend/control/stack-pages/search-result-page.cpp
+++ b/frontend/control/stack-pages/search-result-page.cpp
@ -173,6 +173,10 @@ void SearchResultPage::initConnections()
        sendResizeWidthSignal(280);
    });
    connect(this, &SearchResultPage::setSelectionInfo, m_resultArea, &ResultArea::setSelectionInfo);
+    //跟随主题透明度变化
+    connect(qApp, &QApplication::paletteChanged, this, [=]() {
+        update();
+    });
 }

 void SearchResultPage::setupConnectionsForWidget(ResultWidget *widget)
--- a/frontend/frontend.pro
+++ b/frontend/frontend.pro
@ -1,13 +1,13 @@
-QT       += core gui dbus  KWindowSystem xml x11extras
+QT       += core gui dbus KWindowSystem xml x11extras sql

 greaterThan(QT_MAJOR_VERSION, 4): QT += widgets

-VERSION = 1.0.0
+VERSION = 2.2.3
 DEFINES += VERSION='\\"$${VERSION}\\"'
 TARGET = ukui-search
 TEMPLATE = app

-PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0
+PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 kysdk-waylandhelper
 CONFIG += c++11 link_pkgconfig no_keywords lrelease
 LIBS += -lxapian -lgsettings-qt -lquazip5 -lX11
 #LIBS += -lukui-log4qt
@ -59,7 +59,7 @@ RESOURCES += \
 TRANSLATIONS += \
    ../translations/ukui-search/zh_CN.ts \
    ../translations/ukui-search/tr.ts \
-    ../translations/ukui-search/bo.ts
+    ../translations/ukui-search/bo_CN.ts

 qm_files.path = /usr/share/ukui-search/translations/
 qm_files.files = $$OUT_PWD/.qm/*.qm
--- a/frontend/mainwindow.cpp
+++ b/frontend/mainwindow.cpp
@ -31,15 +31,18 @@
 #include <QPixmap>
 #if (QT_VERSION >= QT_VERSION_CHECK(5, 12, 0))
 #include <KWindowEffects>
-#include "kwindowsystem.h"
+#include <KWindowSystem>
+
 #endif
-#include "global-settings.h"
 #include <QtX11Extras/QX11Info>
+#include "ukuistylehelper/ukuistylehelper.h"
+#include "windowmanager/windowmanager.h"
+#include "global-settings.h"

 #define MAIN_MARGINS 0, 0, 0, 0
 #define TITLE_MARGINS 0,0,0,0
 #define UKUI_SEARCH_SCHEMAS "org.ukui.search.settings"
-#define SEARCH_METHOD_KEY "indexSearch"
+#define SEARCH_METHOD_KEY "fileIndexEnable"
 #define WEB_ENGINE_KEY "webEngine"
 #define WINDOW_WIDTH 700
 #define WINDOW_HEIGHT 610
@ -79,12 +82,19 @@ MainWindow::MainWindow(QWidget *parent) :
    initConnections();
    initGsettings();

-    connect(KWindowSystem::self(), &KWindowSystem::activeWindowChanged, this,[&](WId activeWindowId){
-            if (activeWindowId != this->winId()) {
-                tryHideMainwindow();
-            }
-        });
+//    connect(KWindowSystem::self(), &KWindowSystem::activeWindowChanged, this,[&](WId activeWindowId){
+//        qDebug() << "activeWindowChanged!!!" << activeWindowId;
+//            if (activeWindowId != this->winId()) {
+//                tryHideMainwindow();
+//            }
+//        });

+    m_appWidgetPlugin = new AppWidgetPlugin;
+
+    connect(m_appWidgetPlugin, &AppWidgetPlugin::startSearch, this, [ & ] (QString keyword){
+        this->bootOptionsFilter("-s");
+        this->setText(keyword);
+    });
    //NEW_TODO, register plugins
 //    SearchPluginManager::getInstance()->registerPlugin(\\);
 //    m_stackedWidget->setPlugins(SearchPluginManager::getInstance()->getPluginIds());
@ -150,11 +160,11 @@ void MainWindow::initUi() {
    //创建索引询问弹窗
    m_askDialog = new CreateIndexAskDialog(this);
 #if (QT_VERSION >= QT_VERSION_CHECK(5, 12, 0))
-    MotifWmHints ask_dialog_hints;
-    ask_dialog_hints.flags = MWM_HINTS_FUNCTIONS | MWM_HINTS_DECORATIONS;
-    ask_dialog_hints.functions = MWM_FUNC_ALL;
-    ask_dialog_hints.decorations = MWM_DECOR_BORDER;
-    XAtomHelper::getInstance()->setWindowMotifHint(m_askDialog->winId(), ask_dialog_hints);
+//    MotifWmHints ask_dialog_hints;
+//    ask_dialog_hints.flags = MWM_HINTS_FUNCTIONS | MWM_HINTS_DECORATIONS;
+//    ask_dialog_hints.functions = MWM_FUNC_ALL;
+//    ask_dialog_hints.decorations = MWM_DECOR_BORDER;
+//    XAtomHelper::getInstance()->setWindowMotifHint(m_askDialog->winId(), ask_dialog_hints);
 #endif
 }

@ -192,7 +202,6 @@ void MainWindow::bootOptionsFilter(QString opt) {
        if (this->isHidden()) {
            clearSearchResult();
            centerToScreen(this);
-            this->show();
            this->m_searchBarWidget->setFocus();
            this->activateWindow();
        }
@ -217,7 +226,6 @@ void MainWindow::trayIconActivatedSlot(QSystemTrayIcon::ActivationReason reason)
        if(!this->isVisible()) {
            clearSearchResult();
            centerToScreen(this);
-            this->show();
 //            this->m_searchLineEdit->focusIn(); //打开主界面时输入框夺焦，可直接输入
            this->raise();
            this->activateWindow();
@ -301,10 +309,12 @@ void MainWindow::searchKeywordSlot(const QString &keyword)
    //NEW_TODO
    if(keyword == "") {
 //        m_stackedWidget->setPage(int(StackedPage::HomePage));
+        QTimer::singleShot(10, this, [ = ]() {
            m_askTimer->stop();
-        Q_EMIT m_searchResultPage->stopSearch();
+//            Q_EMIT m_searchResultPage->stopSearch();
            m_searchResultPage->hide();
            this->resizeHeight(68);
+        });

    } else {
 //        m_stackedWidget->setPage(int(StackedPage::SearchPage));
@ -327,6 +337,11 @@ void MainWindow::resizeHeight(int height)
    this->setFixedHeight(height);
 }

+void MainWindow::tryHide()
+{
+    this->tryHideMainwindow();
+}
+
 /**
 * @brief monitorResolutionChange  监听屏幕改变
 * @param rect
@ -421,7 +436,16 @@ void MainWindow::centerToScreen(QWidget* widget) {
 //        desk_x = width;
 //        desk_y = height;
 //    }
-    widget->move(desk_x / 2 - x / 2 + desk_rect.left(), desk_y / 3 + desk_rect.top());
+    widget->show();
+    kdk::WindowManager::setGeometry(this->windowHandle(),QRect(desk_x / 2 - x / 2 + desk_rect.left(),
+                                               desk_y / 3 + desk_rect.top(),
+                                               this->width(),
+                                               this->height()));
+    //设置跳过多任务视图
+    kdk::WindowManager::setSkipSwitcher(this->windowHandle(),true);
+    //设置跳过任务栏
+    kdk::WindowManager::setSkipTaskBar(this->windowHandle(),true);
+//    widget->move(desk_x / 2 - x / 2 + desk_rect.left(), desk_y / 3 + desk_rect.top());
 }

 void MainWindow::initGsettings() {
@ -460,7 +484,12 @@ void MainWindow::initTimer() {
    connect(m_askTimer, &QTimer::timeout, this, [ = ]() {
        if(this->isVisible()) {
            m_isAskDialogVisible = true;
+            kdk::UkuiStyleHelper::self()->removeHeader(m_askDialog);
            m_askDialog->show();
+            //设置跳过多任务视图
+            kdk::WindowManager::setSkipSwitcher(m_askDialog->windowHandle(),true);
+            //设置跳过任务栏
+            kdk::WindowManager::setSkipTaskBar(m_askDialog->windowHandle(),true);
            m_currentSearchAsked = true;
        }
        m_askTimer->stop();
@ -558,11 +587,21 @@ void MainWindow::paintEvent(QPaintEvent *event) {

 bool MainWindow::eventFilter(QObject *watched, QEvent *event)
 {
+    if (watched == this) {
+        //失焦退出
+        if (event->type() == QEvent::ActivationChange) {
+            if (QApplication::activeWindow() != this) {
+                tryHideMainwindow();
+                return true;
+            }
+        }
        //kwin alt+f4发出close事件, 需要在存在子窗口时屏蔽该事件
-    if ((watched == this) && (event->type() == QEvent::Close)) {
+        if (event->type() == QEvent::Close) {
            event->ignore();
            tryHideMainwindow();
            return true;
        }
+    }
+
    return QObject::eventFilter(watched, event);
 }
--- a/frontend/mainwindow.h
+++ b/frontend/mainwindow.h
@ -46,6 +46,7 @@
 #include <QSystemTrayIcon>
 #include <QTimer>

+#include "search-app-widget-plugin/search.h"
 #include "index-generator.h"
 #include "libsearch.h"
 #include "create-index-ask-dialog.h"
@ -110,6 +111,7 @@ public Q_SLOTS:
    void settingsBtnClickedSlot();
    void searchKeywordSlot(const QString&);
    void resizeHeight(int height);
+    void tryHide();

 private:

@ -142,6 +144,7 @@ private:
    QTimer * m_researchTimer = nullptr; //创建索引后重新执行一次搜索的计时器
    bool m_currentSearchAsked = false; //本次搜索是否已经询问过是否创建索引了
    QGSettings * m_search_gsettings = nullptr;
+    AppWidgetPlugin *m_appWidgetPlugin = nullptr;

    void setSearchMethod(const bool&);
    double getTransparentData();
--- a/frontend/model/search-result-manager.cpp
+++ b/frontend/model/search-result-manager.cpp
@ -67,6 +67,7 @@ ReceiveResultThread::ReceiveResultThread(DataQueue<SearchPluginIface::ResultInfo
 void ReceiveResultThread::stop()
 {
    this->requestInterruption();
+    this->wait();
    this->quit();
 }

--- a/frontend/res/qt-translations/qt_zh_CN.qm
+++ b/frontend/res/qt-translations/qt_zh_CN.qm
--- a/frontend/resource.qrc
+++ b/frontend/resource.qrc
@ -3,7 +3,6 @@
        <file>res/icons/edit-find-symbolic.svg</file>
        <file>res/icons/desktop.png</file>
        <file>res/icons/close.svg</file>
-        <file>res/qt-translations/qt_zh_CN.qm</file>
        <file>res/icons/net-disconnected.svg</file>
        <file>res/icons/system-search.symbolic.png</file>
        <file>res/icons/ukui-up-symbolic.svg</file>
--- a/frontend/ukui-search-dbus-service.cpp
+++ b/frontend/ukui-search-dbus-service.cpp
@ -12,6 +12,15 @@ void UkuiSearchDbusServices::searchKeyword(QString keyword)
    m_mainWindow->setText(keyword);
 }

+void UkuiSearchDbusServices::mainWindowSwitch()
+{
+    if (m_mainWindow->isActiveWindow()) {
+        m_mainWindow->tryHide();
+    } else {
+        m_mainWindow->bootOptionsFilter("-s");
+    }
+}
+
 UkuiSearchDbusServices::UkuiSearchDbusServices(MainWindow *m)
 {
    m_mainWindow = m;
--- a/frontend/ukui-search-dbus-service.h
+++ b/frontend/ukui-search-dbus-service.h
@ -20,9 +20,10 @@ public:
 public Q_SLOTS:
    void showWindow();
    void searchKeyword(QString keyword);
+    void mainWindowSwitch();

 private:
-    MainWindow *m_mainWindow;
+    MainWindow *m_mainWindow = nullptr;
 };
 }

--- a/frontend/ukui-search-gui.cpp
+++ b/frontend/ukui-search-gui.cpp
@ -35,7 +35,7 @@ UkuiSearchGui::UkuiSearchGui(int &argc, char *argv[], const QString &application

        QTranslator *qt_translator = new QTranslator(this);
        try {
-            if(! qt_translator->load(":/res/qt-translations/qt_zh_CN.qm")) throw - 1;
+            if(! qt_translator->load("/usr/share/qt5/translations/qt_" + QLocale::system().name())) throw - 1;
            this->installTranslator(qt_translator);
        } catch(...) {
            qDebug() << "Load translations file" << QLocale() << "failed!";
--- a/frontend/view/best-list-view.cpp
+++ b/frontend/view/best-list-view.cpp
@ -31,13 +31,24 @@ bool BestListView::isSelected()

 int BestListView::showHeight()
 {
-    int height;
-    int rowheight = this->rowHeight(this->model()->index(0, 0, QModelIndex()));
+    int height(0);
+//    int rowheight = this->rowHeight(this->model()->index(0, 0, QModelIndex()));
+//    if (this->isExpanded()) {
+//        height = m_count * rowheight;
+//    } else {
+//        int show_count = m_count > NUM_LIMIT_SHOWN_DEFAULT ? NUM_LIMIT_SHOWN_DEFAULT : m_count;
+//        height = show_count * rowheight;
+//    }
+
    if (this->isExpanded()) {
-        height = m_count * rowheight;
+        for (int i = 0; i<m_count; ++i) {
+            height += this->rowHeight(this->model()->index(i, 0, QModelIndex()));
+        }
    } else {
        int show_count = m_count > NUM_LIMIT_SHOWN_DEFAULT ? NUM_LIMIT_SHOWN_DEFAULT : m_count;
-        height = show_count * rowheight;
+        for (int i = 0; i<show_count; ++i) {
+            height += this->rowHeight(this->model()->index(i, 0, QModelIndex()));
+        }
    }
    return height;
 }
--- a/frontend/view/result-view-delegate.cpp
+++ b/frontend/view/result-view-delegate.cpp
@ -3,15 +3,15 @@
 using namespace UkuiSearch;
 static ResultItemStyle *global_instance_of_item_style = nullptr;

-ResultViewDelegate::ResultViewDelegate(QObject *parent) : QStyledItemDelegate(parent)
+ResultViewDelegate::ResultViewDelegate(QObject *parent) : QStyledItemDelegate(parent),
+    m_textDoc(new QTextDocument(this)),
+    m_hightLightEffectHelper(new HightLightEffectHelper(this))
 {
-
 }

 void ResultViewDelegate::setSearchKeyword(const QString &regFindKeyWords)
 {
-    m_regFindKeyWords.clear();
-    m_regFindKeyWords = regFindKeyWords;
+    m_hightLightEffectHelper->setExpression(regFindKeyWords);
 }

 QSize ResultViewDelegate::sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const
@ -21,90 +21,36 @@ QSize ResultViewDelegate::sizeHint(const QStyleOptionViewItem &option, const QMo
    return size;
 }

-void ResultViewDelegate::paint(QPainter * painter, const QStyleOptionViewItem & option, const QModelIndex & index) const {
+void ResultViewDelegate::paint(QPainter *painter, const QStyleOptionViewItem &option, const QModelIndex &index) const
+{
    QStyleOptionViewItem opt = option;
    initStyleOption(&opt, index);
-    QStyle *style = opt.widget->style();
+    opt.displayAlignment = Qt::Alignment(Qt::AlignLeft|Qt::AlignVCenter);

    QString text = opt.text;
-    if(text.isEmpty()) {
-        return;
-    }
    opt.text = QString();
+
+    QStyle *style = opt.widget->style();
    style->proxy()->drawControl(QStyle::CE_ItemViewItem, &opt, painter, opt.widget); //绘制非文本区域内容

-    opt.text = text;
-    QTextDocument doc;
-    doc.setHtml(getHtmlText(painter, opt, index)); //提取富文本
-    QAbstractTextDocumentLayout* layout = doc.documentLayout();
-    const double height = layout->documentSize().height();
-
-
    QRect textRect = style->subElementRect(QStyle::SE_ItemViewItemText, &opt, opt.widget);
-    //使图标和文本间隔与原来保持一致，故文本区域右移4
-//    textRect.adjust(4, 0, 0, 0);
-    double y = textRect.y();
-    y += (textRect.height() - height) / 2;
+    QFontMetrics fontMetrics(opt.font);
+    text = fontMetrics.elidedText(text, Qt::ElideRight, textRect.width() - 5); //富余5px的宽度
+    opt.text = text;

-    QAbstractTextDocumentLayout::PaintContext context;
-
-    QPalette::ColorGroup cg = opt.state & QStyle::State_Enabled
-            ? QPalette::Normal : QPalette::Disabled;
-    if (cg == QPalette::Normal && !(opt.state & QStyle::State_Active))
-        cg = QPalette::Inactive;
-
-    if(opt.state & QStyle::State_Selected) {
-        painter->setPen(opt.palette.color(cg, QPalette::HighlightedText));
-    } else {
-        painter->setPen(opt.palette.color(cg, QPalette::Text));
-    }
    painter->save();
-    painter->translate(QPointF(textRect.x(), y));
-    layout->draw(painter, context); //绘制文本区域内容
+    if(opt.state & QStyle::State_Selected) {
+        m_hightLightEffectHelper->setTextColor(QBrush(opt.palette.highlightedText().color()));
+    } else {
+        m_hightLightEffectHelper->setTextColor(QBrush(opt.palette.text().color()));
+    }
+    painter->translate(textRect.topLeft());
+
+    m_textDoc->setPlainText(text);
+    m_hightLightEffectHelper->setDocument(m_textDoc);
+    m_hightLightEffectHelper->rehighlight();
+    m_textDoc->drawContents(painter);
    painter->restore();
-
-}
-
-QString ResultViewDelegate::getHtmlText(QPainter *painter, const QStyleOptionViewItem &itemOption, const QModelIndex &index) const
-{
-    int indexFindLeft = 0;
-    QString indexString = index.model()->data(index, Qt::DisplayRole).toString();
-    QFont ft(painter->font().family(), GlobalSettings::getInstance()->getValue(FONT_SIZE_KEY).toInt());
-    QFontMetrics fm(ft);
-    QString indexColString = fm.elidedText(indexString, Qt::ElideRight, itemOption.rect.width() - 30 - 10); //当字体超过Item的长度时显示为省略号
-    QString htmlString;
-    if((indexColString.toUpper()).contains((m_regFindKeyWords.toUpper()))) {
-        indexFindLeft = indexColString.toUpper().indexOf(m_regFindKeyWords.toUpper()); //得到查找字体在当前整个Item字体中的位置
-        htmlString = escapeHtml(indexColString.left(indexFindLeft)) + "<b>" + escapeHtml(indexColString.mid(indexFindLeft, m_regFindKeyWords.length())) + "</b>" + escapeHtml(indexColString.right(indexColString.length() - indexFindLeft - m_regFindKeyWords.length()));
-    } else {
-        bool boldOpenned = false;
-        for(int i = 0; i < indexColString.length(); i++) {
-            if((m_regFindKeyWords.toUpper()).contains(QString(indexColString.at(i)).toUpper())) {
-                if(! boldOpenned) {
-                    boldOpenned = true;
-                    htmlString.append(QString("<b>"));
-                }
-                htmlString.append(escapeHtml(QString(indexColString.at(i))));
-            } else {
-                if(boldOpenned) {
-                    boldOpenned = false;
-                    htmlString.append(QString("</b>"));
-                }
-                htmlString.append(escapeHtml(QString(indexColString.at(i))));
-
-            }
-        }
-    }
-//    qDebug()<<indexColString<<"---->"<<htmlString;
-    return "<pre>" + htmlString + "</pre>";
-}
-
-QString ResultViewDelegate::escapeHtml(const QString &str) const
-{
-    QString temp = str;
-    temp.replace("<", "&lt;");
-    temp.replace(">", "&gt;");
-    return temp;
 }

 ResultItemStyle *ResultItemStyle::getStyle()
@ -259,3 +205,32 @@ void ResultItemStyle::drawControl(QStyle::ControlElement element, const QStyleOp
        break;
    }
 }
+
+HightLightEffectHelper::HightLightEffectHelper(QObject *parent) : QSyntaxHighlighter(parent)
+{
+    m_expression.setCaseSensitivity(Qt::CaseInsensitive);
+    m_expression.setPatternSyntax(QRegExp::FixedString);
+}
+
+void HightLightEffectHelper::setExpression(const QString &text)
+{
+    m_expression.setPattern(text);
+}
+
+void HightLightEffectHelper::setTextColor(const QBrush &brush)
+{
+    m_textCharFormat.setForeground(brush);
+}
+
+void HightLightEffectHelper::highlightBlock(const QString &text)
+{
+    setFormat(0, text.length(), m_textCharFormat);
+    m_textCharFormat.setFontWeight(QFont::Bold);
+    int index = text.indexOf(m_expression);
+    while(index >= 0){
+        int length = m_expression.matchedLength();
+        setFormat(index, length, m_textCharFormat);
+        index = text.indexOf(m_expression, index+length);
+    }
+    m_textCharFormat.setFontWeight(QFont::Normal);
+}
--- a/frontend/view/result-view-delegate.h
+++ b/frontend/view/result-view-delegate.h
@ -27,10 +27,29 @@
 #include <QTextDocument>
 #include <QAbstractTextDocumentLayout>
 #include <QProxyStyle>
+#include <QSyntaxHighlighter>
+#include <QTextCharFormat>
+#include <QRegExp>
 #include "global-settings.h"

 namespace UkuiSearch {
-class ResultViewDelegate : public QStyledItemDelegate {
+class HightLightEffectHelper : public QSyntaxHighlighter
+{
+public:
+    explicit HightLightEffectHelper(QObject *parent = nullptr);
+    void setExpression(const QString &text);
+    void setTextColor(const QBrush &brush);
+
+protected:
+    void highlightBlock(const QString &text);
+
+private:
+    QRegExp m_expression;
+    QTextCharFormat m_textCharFormat;
+};
+
+class ResultViewDelegate : public QStyledItemDelegate
+{
    Q_OBJECT
 public:
    explicit ResultViewDelegate(QObject *parent = nullptr);
@ -38,11 +57,12 @@ public:
    void setSearchKeyword(const QString &);
 protected:
    QSize sizeHint(const QStyleOptionViewItem &option, const QModelIndex &index) const;
-private:
-    QString m_regFindKeyWords = 0;
    void paint(QPainter *, const QStyleOptionViewItem &, const QModelIndex &) const override;
-    QString getHtmlText(QPainter *, const QStyleOptionViewItem &, const QModelIndex &) const;
-    QString escapeHtml(const QString&) const;
+
+private:
+    QTextDocument *m_textDoc = nullptr;
+    HightLightEffectHelper *m_hightLightEffectHelper = nullptr;
+
 };

 class ResultItemStyle : public QProxyStyle
--- a/frontend/view/result-view.cpp
+++ b/frontend/view/result-view.cpp
@ -168,6 +168,7 @@ ResultView::ResultView(const QString &plugin_id, QWidget *parent) : QTreeView(pa
 {
 //    setStyle(ResultItemStyle::getStyle());
    this->setFrameShape(QFrame::NoFrame);
+    this->viewport()->setAttribute(Qt::WA_AcceptTouchEvents);
    this->viewport()->setAutoFillBackground(false);
    this->setIconSize(QSize(VIEW_ICON_SIZE, VIEW_ICON_SIZE));
    this->setRootIsDecorated(false);
@ -181,6 +182,9 @@ ResultView::ResultView(const QString &plugin_id, QWidget *parent) : QTreeView(pa
    m_plugin_id = plugin_id;
    m_styleDelegate = new ResultViewDelegate(this);
    this->setItemDelegate(m_styleDelegate);
+    m_touchTimer = new QTimer(this);
+    m_touchTimer->setSingleShot(true);
+    m_touchTimer->setInterval(100);
 }

 bool ResultView::isSelected()
@ -190,13 +194,23 @@ bool ResultView::isSelected()

 int ResultView::showHeight()
 {
-    int height;
-    int rowheight = this->rowHeight(this->model()->index(0, 0, QModelIndex()));
+    int height(0);
+//    int rowheight = this->rowHeight(this->model()->index(0, 0, QModelIndex()));
+//    if (this->isExpanded()) {
+//        height = m_count * rowheight;
+//    } else {
+//        int show_count = m_count > NUM_LIMIT_SHOWN_DEFAULT ? NUM_LIMIT_SHOWN_DEFAULT : m_count;
+//        height = show_count * rowheight;
+//    }
    if (this->isExpanded()) {
-        height = m_count * rowheight;
+        for (int i = 0; i<m_count; ++i) {
+            height += this->rowHeight(this->model()->index(i, 0, QModelIndex()));
+        }
    } else {
        int show_count = m_count > NUM_LIMIT_SHOWN_DEFAULT ? NUM_LIMIT_SHOWN_DEFAULT : m_count;
-        height = show_count * rowheight;
+        for (int i = 0; i<show_count; ++i) {
+            height += this->rowHeight(this->model()->index(i, 0, QModelIndex()));
+        }
    }
    return height;
 }
@ -268,10 +282,10 @@ void ResultView::onRowSelectedSlot(const QModelIndex &index)
 void ResultView::onItemListChanged(const int &count)
 {
    m_count = count;
-    Q_EMIT this->listLengthChanged(count);
    QModelIndex index = this->currentIndex();
    m_model->refresh();
    this->setCurrentIndex(index);
+    Q_EMIT this->listLengthChanged(count);
 }

 void ResultView::setExpanded(const bool &is_expanded)
@ -334,6 +348,53 @@ void ResultView::mouseMoveEvent(QMouseEvent *event)
    return QTreeView::mouseMoveEvent(event);
 }

+bool ResultView::viewportEvent(QEvent *event)
+{
+     if (event->type() == QEvent::TouchBegin) {
+         qDebug() << "TouchBegin==============";
+         QTouchEvent *e = dynamic_cast<QTouchEvent *>(event);
+         QMouseEvent me(QEvent::MouseButtonPress,
+                        e->touchPoints().at(0).pos(),
+                        this->mapTo(this->window(),e->touchPoints().at(0).pos().toPoint()),
+                        this->mapToGlobal(e->touchPoints().at(0).pos().toPoint()),
+                        Qt::LeftButton,Qt::LeftButton,Qt::NoModifier,Qt::MouseEventSynthesizedByApplication);
+         QApplication::sendEvent(parent(), &me);
+         m_touchTimer->start();
+         event->accept();
+         return true;
+     } else if (event->type() == QEvent::TouchEnd) {
+         qDebug() << "touchend==============" << m_touchTimer->remainingTime();
+         if (m_touchTimer->remainingTime() > 0.001) {
+             QTouchEvent *e = dynamic_cast<QTouchEvent *>(event);
+             QMouseEvent me(QEvent::MouseButtonPress,
+                            e->touchPoints().at(0).pos(),
+                            this->mapTo(this->window(),e->touchPoints().at(0).pos().toPoint()),
+                            this->mapToGlobal(e->touchPoints().at(0).pos().toPoint()),
+                            Qt::LeftButton,Qt::LeftButton,Qt::NoModifier,Qt::MouseEventSynthesizedByApplication);
+             QApplication::sendEvent(this->viewport(),&me);
+
+             QMouseEvent mer(QEvent::MouseButtonRelease,
+                             e->touchPoints().at(0).pos(),
+                             this->mapTo(this->window(),e->touchPoints().at(0).pos().toPoint()),
+                             this->mapToGlobal(e->touchPoints().at(0).pos().toPoint()),
+                             Qt::LeftButton,Qt::LeftButton,Qt::NoModifier,Qt::MouseEventSynthesizedByApplication);
+             QApplication::sendEvent(this->viewport(),&mer);
+         }
+         return true;
+     } else if (event->type() == QEvent::TouchUpdate) {
+         qDebug() << "touchupdate==============";
+         QTouchEvent *e = dynamic_cast<QTouchEvent *>(event);
+         QMouseEvent me(QEvent::MouseMove,
+                        e->touchPoints().at(0).pos(),
+                        this->mapTo(this->window(),e->touchPoints().at(0).pos().toPoint()),
+                        this->mapToGlobal(e->touchPoints().at(0).pos().toPoint()),
+                        Qt::LeftButton,Qt::LeftButton,Qt::NoModifier,Qt::MouseEventSynthesizedByApplication);
+         QApplication::sendEvent(parent(), &me);
+         return true;
+     }
+    return QTreeView::viewportEvent(event);
+}
+
 void ResultView::initConnections()
 {
    connect(this, &ResultView::startSearch, [ = ](const QString &keyword) {
--- a/frontend/view/result-view.h
+++ b/frontend/view/result-view.h
@ -37,6 +37,7 @@ protected:
    void mousePressEvent(QMouseEvent *event);
    void mouseReleaseEvent(QMouseEvent *event);
    void mouseMoveEvent(QMouseEvent *event);
+    bool viewportEvent(QEvent *event);

 private:
    void initConnections();
@ -47,6 +48,7 @@ private:
    int m_count = 0;
    QModelIndex m_tmpCurrentIndex;
    QModelIndex m_tmpMousePressIndex;
+    QTimer *m_touchTimer;

 Q_SIGNALS:
    void startSearch(const QString &);
--- a/frontend/view/web-search-view.cpp
+++ b/frontend/view/web-search-view.cpp
@ -18,6 +18,7 @@
 * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
 *
 */
+#include <QDBusReply>
 #include "web-search-view.h"
 #define MAIN_MARGINS 0,0,0,0
 #define MAIN_SPACING 0
@ -97,6 +98,30 @@ void WebSearchView::LaunchBrowser()
    } else { //默认值
        address = "http://baidu.com/s?word=" + m_keyWord ; //百度
    }
+    bool res(false);
+    QDBusInterface * appLaunchInterface = new QDBusInterface("com.kylin.AppManager",
+                                                             "/com/kylin/AppManager",
+                                                             "com.kylin.AppManager",
+                                                             QDBusConnection::sessionBus());
+    if(!appLaunchInterface->isValid()) {
+        qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message());
+        res = false;
+    } else {
+        appLaunchInterface->setTimeout(10000);
+        QDBusReply<bool> reply = appLaunchInterface->call("LaunchDefaultAppWithUrl", address);
+        if(reply.isValid()) {
+            res = reply;
+        } else {
+            qWarning() << "SoftWareCenter dbus called failed!";
+            res = false;
+        }
+    }
+    if(appLaunchInterface) {
+        delete appLaunchInterface;
+    }
+    appLaunchInterface = NULL;
+    if (res)
+        return;
    QDesktopServices::openUrl(address);
 }

--- a/libchinese-segmentation/LICENSE
+++ b/libchinese-segmentation/LICENSE
@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
--- a/libchinese-segmentation/chinese-segmentation-private.h
+++ b/libchinese-segmentation/chinese-segmentation-private.h
@ -0,0 +1,33 @@
+#ifndef CHINESESEGMENTATIONPRIVATE_H
+#define CHINESESEGMENTATIONPRIVATE_H
+
+#include "chinese-segmentation.h"
+#include "cppjieba/Jieba.hpp"
+#include "cppjieba/KeywordExtractor.hpp"
+
+class ChineseSegmentationPrivate
+{
+public:
+    explicit ChineseSegmentationPrivate(ChineseSegmentation *parent = nullptr);
+    ~ChineseSegmentationPrivate();
+    vector<KeyWord> callSegment(const string& sentence);
+
+    vector<string> callMixSegmentCutStr(const string& sentence);
+    vector<Word> callMixSegmentCutWord(const string& sentence);
+    string lookUpTagOfWord(const string& word);
+    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
+
+    vector<Word> callFullSegment(const string& sentence);
+
+    vector<Word> callQuerySegment(const string& sentence);
+
+    vector<Word> callHMMSegment(const string& sentence);
+
+    vector<Word> callMPSegment(const string& sentence);
+
+private:
+    cppjieba::Jieba *m_jieba;
+    ChineseSegmentation *q = nullptr;
+};
+
+#endif // CHINESESEGMENTATIONPRIVATE_H
--- a/libchinese-segmentation/chinese-segmentation.cpp
+++ b/libchinese-segmentation/chinese-segmentation.cpp
@ -19,72 +19,144 @@
 *
 */
 #include "chinese-segmentation.h"
-#include <QFileInfo>
-#include <QDebug>
-static ChineseSegmentation *global_instance_chinese_segmentation = nullptr;
-QMutex  ChineseSegmentation::m_mutex;
+#include "chinese-segmentation-private.h"

-ChineseSegmentation::ChineseSegmentation() {
-    const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
+ChineseSegmentationPrivate::ChineseSegmentationPrivate(ChineseSegmentation *parent) : q(parent)
+{
+    //const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
    const char * const  HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
-    const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
-    const char * const  IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
+    //const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
+    //const char * const  IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
    const char * const  STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
    m_jieba = new cppjieba::Jieba(DICT_PATH,
                                  HMM_PATH,
                                  USER_DICT_PATH,
-                                  IDF_PATH,
+                                  IDF_DICT_PATH,
                                  STOP_WORD_PATH,
                                  "");
 }

-ChineseSegmentation::~ChineseSegmentation() {
+ChineseSegmentationPrivate::~ChineseSegmentationPrivate() {
    if(m_jieba)
        delete m_jieba;
    m_jieba = nullptr;
 }

-ChineseSegmentation *ChineseSegmentation::getInstance() {
-    QMutexLocker locker(&m_mutex);
-    if(!global_instance_chinese_segmentation) {
-        global_instance_chinese_segmentation = new ChineseSegmentation;
-    }
-    return global_instance_chinese_segmentation;
-}
-
-QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
-//    std::string s;
-//    s = str.toStdString();
-//    str.squeeze();
-
+vector<KeyWord> ChineseSegmentationPrivate::callSegment(const string &sentence) {
    const size_t topk = -1;
-    std::vector<cppjieba::KeyWord> keywordres;
-    ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
-    std::string().swap(s);
-    QVector<SKeyWord> vecNeeds;
-    convert(keywordres, vecNeeds);
+    vector<KeyWord> keywordres;
+    ChineseSegmentationPrivate::m_jieba->extractor.Extract(sentence, keywordres, topk);

-    keywordres.clear();
-//    keywordres.shrink_to_fit();
-    return vecNeeds;
+    return keywordres;

 }

-std::vector<cppjieba::KeyWord> ChineseSegmentation::callSegementStd(const std::string &str) {
-
-    const size_t topk = -1;
-    std::vector<cppjieba::KeyWord> keywordres;
-    ChineseSegmentation::m_jieba->extractor.Extract(str, keywordres, topk);
-
+vector<string> ChineseSegmentationPrivate::callMixSegmentCutStr(const string &sentence)
+{
+    vector<string> keywordres;
+    ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
    return keywordres;
 }

-void ChineseSegmentation::convert(std::vector<cppjieba::KeyWord> &keywordres, QVector<SKeyWord> &kw) {
-    for(auto i : keywordres) {
-        SKeyWord temp;
-        temp.word = i.word;
-        temp.offsets = QVector<size_t>::fromStdVector(i.offsets);
-        temp.weight = i.weight;
-        kw.append(temp);
-    }
+vector<Word> ChineseSegmentationPrivate::callMixSegmentCutWord(const string &sentence)
+{
+    vector<Word> keywordres;
+    ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
+    return keywordres;
+}
+
+string ChineseSegmentationPrivate::lookUpTagOfWord(const string &word)
+{
+    return ChineseSegmentationPrivate::m_jieba->LookupTag(word);
+}
+
+vector<pair<string, string>> ChineseSegmentationPrivate::getTagOfWordsInSentence(const string &sentence)
+{
+     vector<pair<string, string>> words;
+     ChineseSegmentationPrivate::m_jieba->Tag(sentence, words);
+     return words;
+}
+
+vector<Word> ChineseSegmentationPrivate::callFullSegment(const string &sentence)
+{
+    vector<Word> keywordres;
+    ChineseSegmentationPrivate::m_jieba->CutAll(sentence, keywordres);
+    return keywordres;
+}
+
+vector<Word> ChineseSegmentationPrivate::callQuerySegment(const string &sentence)
+{
+    vector<Word> keywordres;
+    ChineseSegmentationPrivate::m_jieba->CutForSearch(sentence, keywordres);
+    return keywordres;
+}
+
+vector<Word> ChineseSegmentationPrivate::callHMMSegment(const string &sentence)
+{
+    vector<Word> keywordres;
+    ChineseSegmentationPrivate::m_jieba->CutHMM(sentence, keywordres);
+    return keywordres;
+}
+
+vector<Word> ChineseSegmentationPrivate::callMPSegment(const string &sentence)
+{
+    size_t maxWordLen = 512;
+    vector<Word> keywordres;
+    ChineseSegmentationPrivate::m_jieba->CutSmall(sentence, keywordres, maxWordLen);
+    return keywordres;
+}
+
+ChineseSegmentation *ChineseSegmentation::getInstance()
+{
+    static ChineseSegmentation *global_instance_chinese_segmentation = new ChineseSegmentation;
+    return global_instance_chinese_segmentation;
+}
+
+vector<KeyWord> ChineseSegmentation::callSegment(const string &sentence)
+{
+    return d->callSegment(sentence);
+}
+
+vector<string> ChineseSegmentation::callMixSegmentCutStr(const string &sentence)
+{
+    return d->callMixSegmentCutStr(sentence);
+}
+
+vector<Word> ChineseSegmentation::callMixSegmentCutWord(const string &str)
+{
+    return d->callMixSegmentCutWord(str);
+}
+
+string ChineseSegmentation::lookUpTagOfWord(const string &word)
+{
+    return d->lookUpTagOfWord(word);
+}
+
+vector<pair<string, string> > ChineseSegmentation::getTagOfWordsInSentence(const string &sentence)
+{
+    return d->getTagOfWordsInSentence(sentence);
+}
+
+vector<Word> ChineseSegmentation::callFullSegment(const string &sentence)
+{
+    return d->callFullSegment(sentence);
+}
+
+vector<Word> ChineseSegmentation::callQuerySegment(const string &sentence)
+{
+    return d->callQuerySegment(sentence);
+}
+
+vector<Word> ChineseSegmentation::callHMMSegment(const string &sentence)
+{
+    return d->callHMMSegment(sentence);
+}
+
+vector<Word> ChineseSegmentation::callMPSegment(const string &sentence)
+{
+    return d->callMPSegment(sentence);
+}
+
+ChineseSegmentation::ChineseSegmentation() : d(new ChineseSegmentationPrivate)
+{
 }
--- a/libchinese-segmentation/chinese-segmentation.h
+++ b/libchinese-segmentation/chinese-segmentation.h
@ -22,42 +22,95 @@
 #define CHINESESEGMENTATION_H

 #include "libchinese-segmentation_global.h"
-#include "cppjieba/Jieba.hpp"
-//#include "Logging.hpp"
-//#include "LocalVector.hpp"
-//#include "cppjieba/QuerySegment.hpp"
-#include "cppjieba/KeywordExtractor.hpp"
-#include <QVector>
-#include <QString>
-#include <QDebug>
-#include <QMutex>
-
-struct SKeyWord {
-    std::string word;
-    QVector<size_t> offsets;
-    double weight;
-    ~SKeyWord() {
-        word = std::move("");
-        offsets.clear();
-        offsets.shrink_to_fit();
-    }
-};
+#include "common-struct.h"

+class ChineseSegmentationPrivate;
 class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
 public:
    static ChineseSegmentation *getInstance();
-    QVector<SKeyWord> callSegement(std::string s);
-    std::vector<cppjieba::KeyWord> callSegementStd(const std::string& str);
+
+    /**
+     * @brief ChineseSegmentation::callSegment
+     * 调用extractor进行关键词提取，先使用Mix方式初步分词，再使用Idf词典进行关键词提取，只包含两字以上关键词
+     *
+     * @param sentence 要提取关键词的句子
+     * @return vector<KeyWord> 存放提取后关键词的信息的容器
+     */
+    vector<KeyWord> callSegment(const string &sentence);
+
+    /**
+     * @brief ChineseSegmentation::callMixSegmentCutStr
+     * 使用Mix方法进行分词，即先使用最大概率法MP初步分词，再用隐式马尔科夫模型HMM进一步分词，可以准确切出词典已有词和未登录词，结果比较准确
+     *
+     * @param sentence 要分词的句子
+     * @return vector<string> 只存放分词后每个词的内容的容器
+     */
+    vector<string> callMixSegmentCutStr(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callMixSegmentCutWord
+     * 和callMixSegmentCutStr功能相同
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callMixSegmentCutWord(const string& str);
+
+    /**
+     * @brief ChineseSegmentation::lookUpTagOfWord
+     * 查询word的词性
+     * @param word 要查询词性的词
+     * @return string word的词性
+     */
+    string lookUpTagOfWord(const string& word);
+
+    /**
+     * @brief ChineseSegmentation::getTagOfWordsInSentence
+     * 使用Mix分词后获取每个词的词性
+     * @param sentence 要分词的句子
+     * @return vector<pair<string, string>> 分词后的每个词的内容(firsr)和其对应的词性(second)
+     */
+    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
+
+    /**
+     * @brief ChineseSegmentation::callFullSegment
+     * 使用Full进行分词，Full会切出字典里所有的词。
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callFullSegment(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callQuerySegment
+     * 使用Query进行分词，即先使用Mix，对于长词再用Full，结果最精确，但词的数量也最大
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callQuerySegment(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callHMMSegment
+     * 使用隐式马尔科夫模型HMM进行分词
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callHMMSegment(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callMPSegment
+     * 使用最大概率法MP进行分词
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callMPSegment(const string& sentence);

 private:
    explicit ChineseSegmentation();
-    ~ChineseSegmentation();
-    void convert(std::vector<cppjieba::KeyWord>& keywordres, QVector<SKeyWord>& kw);
+    ~ChineseSegmentation() = default;
+    ChineseSegmentation(const ChineseSegmentation&) = delete;
+    ChineseSegmentation& operator =(const ChineseSegmentation&) = delete;

 private:
-    static QMutex m_mutex;
-    cppjieba::Jieba *m_jieba;
-
+    ChineseSegmentationPrivate *d = nullptr;
 };

 #endif // CHINESESEGMENTATION_H
--- a/libchinese-segmentation/common-struct.h
+++ b/libchinese-segmentation/common-struct.h
@ -0,0 +1,52 @@
+#ifndef COMMONSTRUCT_H
+#define COMMONSTRUCT_H
+
+#include <string>
+#include <vector>
+
+using namespace std;
+
+/**
+ * @brief The KeyWord struct
+ *
+ * @property word the content of keyword
+ * @property offsets the Unicode offsets, can be used to check the word pos in a sentence
+ * @property weight the weight of the keyword
+ */
+
+struct KeyWord {
+    string word;
+    vector<size_t> offsets;
+    double weight;
+    ~KeyWord() {
+        word = std::move("");
+        offsets.clear();
+        offsets.shrink_to_fit();
+    }
+};
+
+/**
+ * @brief The Word struct
+ *
+ * @property word the content of word
+ * @property offset the offset of the word(absolute pos, Chinese 3 , English 1)， can be used to check the word pos in a sentence
+ * @property unicode_offset the Unicode offset of the word
+ * @property unicode_length the Unicode length of the word
+ */
+struct Word {
+    string word;
+    uint32_t offset;
+    uint32_t unicode_offset;
+    uint32_t unicode_length;
+    Word(const string& w, uint32_t o)
+        : word(w), offset(o) {
+    }
+    Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
+        : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
+    }
+    ~Word() {
+        word = std::move("");
+    }
+}; // struct Word
+
+#endif // COMMONSTRUCT_H
--- a/libchinese-segmentation/cppjieba/DatTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DatTrie.hpp
@ -13,7 +13,12 @@

 #include "limonp/Md5.hpp"
 #include "Unicode.hpp"
-#include "darts.h"
+//#define USE_DARTS_CLONE
+#ifdef USE_DARTS_CLONE
+#include "../storage-base/darts-clone/darts.h"
+#else
+#include "../storage-base/cedar/cedar.h"
+#endif

 namespace cppjieba {

@ -60,20 +65,6 @@ inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
    return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
 }

-struct DatMemElem {
-    double weight = 0.0;
-    char tag[8] = {};
-
-    void SetTag(const string & str) {
-        memset(&tag[0], 0, sizeof(tag));
-        strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
-    }
-
-    string GetTag() const {
-        return &tag[0];
-    }
-};
-
 struct PinYinMemElem {
    char tag[6] = {};

@ -90,14 +81,11 @@ struct PinYinMemElem {
 inline std::ostream & operator << (std::ostream& os, const DatMemElem & elem) {
    return os << "/tag=" << elem.GetTag() << "/weight=" << elem.weight;
 }
-
-struct DatDag {
-    limonp::LocalVector<pair<size_t, const DatMemElem *> > nexts;
-    double max_weight;
-    int max_next;
-};
-
+#ifdef USE_DARTS_CLONE
 typedef Darts::DoubleArray JiebaDAT;
+#else
+typedef cedar::da<int, -1, -2, false> JiebaDAT;
+#endif


 struct CacheFileHeader {
@ -124,6 +112,7 @@ public:
    }

    const DatMemElem * Find(const string & key) const {
+#ifdef USE_DARTS_CLONE
        JiebaDAT::result_pair_type find_result;
        dat_.exactMatchSearch(key.c_str(), find_result);

@ -132,9 +121,16 @@ public:
        }

        return &elements_ptr_[ find_result.value ];
+#else
+        int result = dat_.exactMatchSearch<int>(key.c_str());
+        if (result < 0)
+            return nullptr;
+        return &elements_ptr_[result];
+#endif
    }

    const double Find(const string & key, std::size_t length, std::size_t node_pos) const {
+#ifdef USE_DARTS_CLONE
        JiebaDAT::result_pair_type find_result;
        dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos);

@ -143,9 +139,16 @@ public:
        }

        return idf_elements_ptr_[ find_result.value ];
+#else
+        int result = dat_.exactMatchSearch<int>(key.c_str(), length, node_pos);
+        if (result < 0)
+            return -1;
+        return idf_elements_ptr_[result];
+#endif
    }

    const PinYinMemElem * PinYinFind(const string & key) const {
+#ifdef USE_DARTS_CLONE
        JiebaDAT::result_pair_type find_result;
        dat_.exactMatchSearch(key.c_str(), find_result);

@ -154,6 +157,12 @@ public:
        }

        return &pinyin_elements_ptr_[ find_result.value ];
+#else
+        int result = dat_.exactMatchSearch<int>(key.c_str());
+        if (result < 0)
+            return nullptr;
+        return &pinyin_elements_ptr_[result];
+#endif
    }

    void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
@ -259,7 +268,7 @@ public:
            max_weight[i] = -3.14e+100;
        }
        int max_next[str_size];//存放动态规划后的分词结果
-        memset(max_next,-1,str_size);
+        //memset(max_next,-1,str_size);

        double val(0);
        for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
@ -367,7 +376,7 @@ public:

        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(DatMemElem)  + header.dat_size * dat_.unit_size());
        elements_ptr_ = (const DatMemElem *)(mmap_addr_ + sizeof(header));
-        const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(DatMemElem) * elements_num_;
+        char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(DatMemElem) * elements_num_;
        dat_.set_array(dat_ptr, header.dat_size);
        return true;
    }
@ -398,7 +407,7 @@ public:

        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double)  + header.dat_size * dat_.unit_size());
        idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header));
-        const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_;
+        char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_;
        dat_.set_array(dat_ptr, header.dat_size);
        return true;
    }
@ -429,7 +438,7 @@ public:

        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(PinYinMemElem)  + header.dat_size * dat_.unit_size());
        pinyin_elements_ptr_ = (const PinYinMemElem *)(mmap_addr_ + sizeof(header));
-        const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(PinYinMemElem) * elements_num_;
+        char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(PinYinMemElem) * elements_num_;
        dat_.set_array(dat_ptr, header.dat_size);
        return true;
    }
@ -469,7 +478,6 @@ private:
            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
            ::umask(S_IWGRP | S_IWOTH);
            //const int fd =::mkstemp(&tmp_filepath[0]);
-            //原mkstemp用法有误，已修复--jxx20210519
            const int fd =::mkstemp((char *)tmp_filepath.data());
            qDebug() << "mkstemp :" << errno << tmp_filepath.data();
            assert(fd >= 0);
@ -518,7 +526,6 @@ private:
            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
            ::umask(S_IWGRP | S_IWOTH);
            //const int fd =::mkstemp(&tmp_filepath[0]);
-            //原mkstemp用法有误，已修复--jxx20210519
            const int fd =::mkstemp((char *)tmp_filepath.data());
            qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
            assert(fd >= 0);
--- a/libchinese-segmentation/cppjieba/DictTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DictTrie.hpp
@ -18,7 +18,6 @@ namespace cppjieba {

 using namespace limonp;

-const double MIN_DOUBLE = -3.14e+100;
 const double MAX_DOUBLE = 3.14e+100;
 const size_t DICT_COLUMN_NUM = 3;
 const char* const UNKNOWN_TAG = "";
@ -42,14 +41,14 @@ public:
        return dat_.Find(word);
    }

-    void Find(RuneStrArray::const_iterator begin,
+    void FindDatDag(RuneStrArray::const_iterator begin,
              RuneStrArray::const_iterator end,
              vector<struct DatDag>&res,
              size_t max_word_len = MAX_WORD_LENGTH) const {
        dat_.Find(begin, end, res, max_word_len);
    }

-    void Find(RuneStrArray::const_iterator begin,
+    void FindWordRange(RuneStrArray::const_iterator begin,
              RuneStrArray::const_iterator end,
              vector<WordRange>& words,
              size_t max_word_len = MAX_WORD_LENGTH) const {
@ -134,9 +133,9 @@ private:
        total_dict_size_ = file_size_sum;

        if (dat_cache_path.empty()) {
-            //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
-            dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) +  ".dat_cache";
+            dat_cache_path = "/tmp/" + md5 + ".dat_";//未指定词库数据文件存储位置的默认存储在tmp目录下
        }
+         dat_cache_path += VERSION;
        QString path = QString::fromStdString(dat_cache_path);
        qDebug() << "#########Dict path:" << path;
        if (dat_.InitAttachDat(dat_cache_path, md5)) {
--- a/libchinese-segmentation/cppjieba/FullSegment.hpp
+++ b/libchinese-segmentation/cppjieba/FullSegment.hpp
@ -4,7 +4,8 @@
 #include <set>
 #include <cassert>
 #include "limonp/Logging.hpp"
-#include "DictTrie.hpp"
+#include "segment-trie/segment-trie.h"
+//#include "DictTrie.hpp"
 #include "SegmentBase.hpp"
 #include "Unicode.hpp"

@ -22,7 +23,7 @@ public:
                     vector<WordRange>& res, bool, size_t) const override {
        assert(dictTrie_);
        vector<struct DatDag> dags;
-        dictTrie_->Find(begin, end, dags);
+        dictTrie_->FindDatDag(begin, end, dags);
        size_t max_word_end_pos = 0;

        for (size_t i = 0; i < dags.size(); i++) {
@ -45,11 +46,19 @@ public:

    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                     size_t) const override {
-
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t) const override {
-
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
    }
 private:
    const DictTrie* dictTrie_;
--- a/libchinese-segmentation/cppjieba/HMMModel.hpp
+++ b/libchinese-segmentation/cppjieba/HMMModel.hpp
@ -1,12 +1,18 @@
 #pragma once

 #include "limonp/StringUtil.hpp"
-
+//#define USE_CEDAR_SEGMENT //使用cedar初步测试性能损失3%-5%左右，内存占用降低近1M
+#ifdef USE_CEDAR_SEGMENT
+#include "cedar/cedar.h"
+#endif
 namespace cppjieba {

 using namespace limonp;
+#ifdef USE_CEDAR_SEGMENT
+typedef cedar::da<float, -1, -2, false> EmitProbMap;
+#else
 typedef unordered_map<Rune, double> EmitProbMap;
-
+#endif
 struct HMMModel {
    /*
     * STATUS:
@ -73,6 +79,12 @@ struct HMMModel {
    }
    double GetEmitProb(const EmitProbMap* ptMp, Rune key,
                       double defVal)const {
+#ifdef USE_CEDAR_SEGMENT
+        char str_key[8];
+        snprintf(str_key, sizeof(str_key), "%d", key);
+        float result = ptMp->exactMatchSearch<float>(str_key);
+        return result < 0 ? defVal : result;
+#else
        EmitProbMap::const_iterator cit = ptMp->find(key);

        if (cit == ptMp->end()) {
@ -80,6 +92,7 @@ struct HMMModel {
        }

        return cit->second;
+#endif
    }
    bool GetLine(ifstream& ifile, string& line) {
        while (getline(ifile, line)) {
@ -119,8 +132,13 @@ struct HMMModel {
                XLOG(ERROR) << "TransCode failed.";
                return false;
            }
-
+#ifdef USE_CEDAR_SEGMENT
+            char str_key[8];
+            snprintf(str_key, sizeof(str_key), "%d", unicode[0]);
+            mp.update(str_key, std::strlen(str_key), atof(tmp2[1].c_str()));
+#else
            mp[unicode[0]] = atof(tmp2[1].c_str());
+#endif
        }

        return true;
--- a/libchinese-segmentation/cppjieba/HMMSegment.hpp
+++ b/libchinese-segmentation/cppjieba/HMMSegment.hpp
@ -8,6 +8,9 @@
 #include "SegmentBase.hpp"

 namespace cppjieba {
+
+const double MIN_DOUBLE = -3.14e+100;
+
 class HMMSegment: public SegmentBase {
 public:
    HMMSegment(const HMMModel* model)
@ -59,11 +62,19 @@ public:

    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                     size_t) const override {
-
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t) const override {
-
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
    }
 private:
    // sequential letters rule
--- a/libchinese-segmentation/cppjieba/IdfTrie.hpp
+++ b/libchinese-segmentation/cppjieba/IdfTrie.hpp
@ -51,9 +51,9 @@ private:
        total_dict_size_ = file_size_sum;

        if (dat_cache_path.empty()) {
-            //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
-            dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) +  ".dat_cache";
+            dat_cache_path = "/tmp/" + md5 + ".dat_";//未指定词库数据文件存储位置的默认存储在tmp目录下
        }
+         dat_cache_path += VERSION;
        QString path = QString::fromStdString(dat_cache_path);
        qDebug() << "#########Idf path:" << path;
        if (dat_.InitIdfAttachDat(dat_cache_path, md5)) {
--- a/libchinese-segmentation/cppjieba/Jieba.hpp
+++ b/libchinese-segmentation/cppjieba/Jieba.hpp
@ -3,6 +3,7 @@
 #include <memory>
 #include "QuerySegment.hpp"
 #include "KeywordExtractor.hpp"
+#include "segment-trie/segment-trie.h"

 namespace cppjieba {

@ -21,7 +22,7 @@ public:
          mix_seg_(&dict_trie_, &model_, stopWordPath),
          full_seg_(&dict_trie_),
          query_seg_(&dict_trie_, &model_, stopWordPath),
-          extractor(&dict_trie_, &model_, idfPath, dat_cache_path,stopWordPath){ }
+          extractor(&dict_trie_, &model_, idfPath, dat_cache_path, stopWordPath){ }
    ~Jieba() { }

    void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
@ -61,9 +62,6 @@ public:
    string LookupTag(const string &str) const {
        return mix_seg_.LookupTag(str);
    }
-    bool Find(const string& word) {
-        return nullptr != dict_trie_.Find(word);
-    }

    void ResetSeparators(const string& s) {
        //TODO
--- a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
@ -2,7 +2,8 @@

 #include <cmath>
 #include "MixSegment.hpp"
-#include "IdfTrie.hpp"
+//#include "IdfTrie.hpp"
+#include "idf-trie/idf-trie.h"

 namespace cppjieba {

@ -19,7 +20,7 @@ public:
                     const string& dat_cache_path,
                     const string& stopWordPath)
        : segment_(dictTrie, model, stopWordPath),
-        idf_trie_(idfPath,dat_cache_path){
+          idf_trie_(idfPath, dat_cache_path){
    }
    ~KeywordExtractor() {
    }
@ -64,7 +65,7 @@ public:
            if (-1 != idf) {//IDF词典查找
                itr->second.weight *= idf;
            } else {
-                itr->second.weight *= idf_trie_.idfAverage_;
+                itr->second.weight *= idf_trie_.GetIdfAverage();
            }

            itr->second.word = itr->first;
--- a/libchinese-segmentation/cppjieba/MPSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MPSegment.hpp
@ -4,7 +4,8 @@
 #include <set>
 #include <cassert>
 #include "limonp/Logging.hpp"
-#include "DictTrie.hpp"
+#include "segment-trie/segment-trie.h"
+//#include "DictTrie.hpp"
 #include "SegmentTagged.hpp"
 #include "PosTagger.hpp"

@ -22,20 +23,24 @@ public:
                     RuneStrArray::const_iterator end,
                     vector<WordRange>& words,
                     bool, size_t max_word_len) const override {
-//        vector<DatDag> dags;
-//        dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx
-//        CalcDP(dags);//动态规划（Dynamic Programming，DP），根据DAG计算最优动态规划路径--jxx
-//        CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx
-        dictTrie_->Find(begin, end, words, max_word_len);
+        dictTrie_->FindWordRange(begin, end, words, max_word_len);
    }

    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                     size_t) const override {
-
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t) const override {
-
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
    }
    const DictTrie* GetDictTrie() const override {
        return dictTrie_;
@ -77,6 +82,7 @@ private:
    }
 */
 /*  倒叙方式重写CalcDP函数，初步测试未发现问题*/
+/*
    void CalcDP(vector<DatDag>& dags) const {
        double val(0);
        size_t size = dags.size();
@ -87,8 +93,6 @@ private:

            for (const auto & it : dags[size - 1 - i].nexts) {
                const auto nextPos = it.first;
-                val = dictTrie_->GetMinWeight();
-
                if (nullptr != it.second) {
                    val = it.second->weight;
                }
@ -119,7 +123,7 @@ private:
            i = next;
        }
    }
-
+*///相关功能已集成到Find函数中
    const DictTrie* dictTrie_;
    PosTagger tagger_;

--- a/libchinese-segmentation/cppjieba/MixSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MixSegment.hpp
@ -5,6 +5,10 @@
 #include "HMMSegment.hpp"
 #include "limonp/StringUtil.hpp"
 #include "PosTagger.hpp"
+#define STOP_WORDS_USE_CEDAR_SEGMENT //使用cedar初步测试性能提升3%-5%左右，内存占用降低近不明显
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+#include "cedar/cedar.h"
+#endif

 namespace cppjieba {
 class MixSegment: public SegmentTagged {
@ -73,7 +77,7 @@ public:
 //            mpSeg_.CutRuneArray(begin, end, res);
 //            return;
 //        }
-
+        std::ignore = hmm;
        vector<WordRange> words;
        assert(end >= begin);
        words.reserve(end - begin);
@ -122,6 +126,7 @@ public:

    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t) const override {
+        std::ignore = hmm;
        vector<WordRange> words;
        vector<WordRange> hmmRes;
        assert(end >= begin);
@ -139,9 +144,15 @@ public:
                string str = GetStringFromRunes(s, words[i].left, words[i].right);

                if (words[i].left != words[i].right) {
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+                    if (0 < stopWords_.exactMatchSearch<int>(str.c_str(), str.size())) {
+                        continue;
+                    }
+#else
                    if (stopWords_.find(str) != stopWords_.end()) {
                        continue;
                    }
+#endif
                    res[str].offsets.push_back(words[i].left->offset);
                    res[str].weight += 1.0;
                    continue;
@ -149,9 +160,15 @@ public:

                if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
                        || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+                    if (0 < stopWords_.exactMatchSearch<int>(str.c_str(), str.size())) {
+                        continue;
+                    }
+#else
                    if (stopWords_.find(str) != stopWords_.end()) {
                        continue;
                    }
+#endif
                    res[str].offsets.push_back(words[i].left->offset);
                    res[str].weight += 1.0;
                    continue;
@ -181,9 +198,16 @@ public:
                //put hmm result to result
                for (size_t k = 0; k < hmmRes.size(); k++) {
                    string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right);
-                    if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) {
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+                    if (0 < stopWords_.exactMatchSearch<int>(hmmStr.c_str(), hmmStr.size())) {
                        continue;
                    }
+#else
+                    if (/*IsSingleWord(hmmStr) || */stopWords_.find(hmmStr) != stopWords_.end()) {
+                        continue;
+                    }
+#endif
+
                    res[hmmStr].offsets.push_back(hmmRes[k].left->offset);
                    res[hmmStr].weight += 1.0;
                }
@ -227,14 +251,21 @@ public:
        string line ;

        while (getline(ifs, line)) {
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+            stopWords_.update(line.c_str(), line.size(), 1);
+#else
            stopWords_.insert(line);
+#endif
        }

        assert(stopWords_.size());
    }
 private:
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+    cedar::da<int, -1, -2, false> stopWords_;
+#else
    unordered_set<string> stopWords_;
-
+#endif
    MPSegment mpSeg_;
    HMMSegment hmmSeg_;
    PosTagger tagger_;
--- a/libchinese-segmentation/cppjieba/PinYinTrie.hpp
+++ b/libchinese-segmentation/cppjieba/PinYinTrie.hpp
@ -63,7 +63,7 @@ public:
        return false;
    }

-    bool isMultiTone(string &word) {
+    bool isMultiTone(const string &word) {
        if (qmap_chinese2pinyin.contains(QString::fromStdString(word)))
            return true;
 //        if (map_chinese2pinyin.contains(word))
--- a/libchinese-segmentation/cppjieba/PosTagger.hpp
+++ b/libchinese-segmentation/cppjieba/PosTagger.hpp
@ -1,8 +1,9 @@
 #pragma once

 #include "limonp/StringUtil.hpp"
-#include "DictTrie.hpp"
-#include "SegmentTagged.hpp"
+#include "segment-trie/segment-trie.h"
+//#include "DictTrie.hpp"
+//#include "SegmentTagged.hpp"

 namespace cppjieba {
 using namespace limonp;
@ -31,10 +32,10 @@ public:

    string LookupTag(const string &str, const SegmentTagged& segment) const {
        const DictTrie * dict = segment.GetDictTrie();
-        assert(dict != NULL);
+        assert(dict != nullptr);
        const auto tmp = dict->Find(str);

-        if (tmp == NULL || tmp->GetTag().empty()) {
+        if (tmp == nullptr || tmp->GetTag().empty()) {
            RuneStrArray runes;

            if (!DecodeRunesInString(str, runes)) {
--- a/libchinese-segmentation/cppjieba/PreFilter.hpp
+++ b/libchinese-segmentation/cppjieba/PreFilter.hpp
@ -69,6 +69,7 @@ public:
                }
                cursor_ ++;
            }
+            return false;
        }

        int max_num = 0;
--- a/libchinese-segmentation/cppjieba/QuerySegment.hpp
+++ b/libchinese-segmentation/cppjieba/QuerySegment.hpp
@ -4,12 +4,10 @@
 #include <set>
 #include <cassert>
 #include "limonp/Logging.hpp"
-#include "DictTrie.hpp"
 #include "SegmentBase.hpp"
 #include "FullSegment.hpp"
 #include "MixSegment.hpp"
 #include "Unicode.hpp"
-#include "DictTrie.hpp"

 namespace cppjieba {
 class QuerySegment: public SegmentBase {
@ -35,7 +33,7 @@ public:
                for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
                    string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 2);

-                    if (trie_->Find(text) != NULL) {
+                    if (trie_->Find(text) != nullptr) {
                        WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
                        res.push_back(wr);
                    }
@ -46,7 +44,7 @@ public:
                for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
                    string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 3);

-                    if (trie_->Find(text) != NULL) {
+                    if (trie_->Find(text) != nullptr) {
                        WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
                        res.push_back(wr);
                    }
@ -59,11 +57,19 @@ public:

    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                     size_t) const override {
-
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t) const override {
-
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
    }
 private:
    bool IsAllAscii(const RuneArray& s) const {
--- a/libchinese-segmentation/cppjieba/Unicode.hpp
+++ b/libchinese-segmentation/cppjieba/Unicode.hpp
@ -7,6 +7,7 @@
 #include <ostream>
 #include "limonp/LocalVector.hpp"
 #include "limonp/StringUtil.hpp"
+#include "common-struct.h"

 namespace cppjieba {

@ -15,29 +16,30 @@ using std::vector;

 typedef uint32_t Rune;

-struct KeyWord {
-    string word;
-    vector<size_t> offsets;
-    double weight;
-}; // struct Word
-
-struct Word {
-    string word;
-    uint32_t offset;
-    uint32_t unicode_offset;
-    uint32_t unicode_length;
-    Word(const string& w, uint32_t o)
-        : word(w), offset(o) {
-    }
-    Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
-        : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
-    }
-}; // struct Word
-
 inline std::ostream& operator << (std::ostream& os, const Word& w) {
    return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
 }

+struct DatMemElem {
+    double weight = 0.0;
+    char tag[8] = {};
+
+    void SetTag(const string & str) {
+        memset(&tag[0], 0, sizeof(tag));
+        strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
+    }
+
+    string GetTag() const {
+        return &tag[0];
+    }
+};
+
+struct DatDag {
+    limonp::LocalVector<pair<size_t, const DatMemElem *> > nexts;
+    //double max_weight;
+    //size_t max_next;
+};
+
 struct RuneInfo {
    Rune rune;
    uint32_t offset;
@ -95,7 +97,6 @@ inline RuneArray DecodeRunesInString(const string& s) {
    return result;
 }

-//重写DecodeRunesInString函数，将实现放入函数中降低内存占用加快处理流程--jxx20210518
 inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {

    uint32_t tmp;
--- a/libchinese-segmentation/cppjieba/cppjieba.pri
+++ b/libchinese-segmentation/cppjieba/cppjieba.pri
@ -17,6 +17,27 @@ HEADERS += \
    $$PWD/SegmentBase.hpp \
    $$PWD/SegmentTagged.hpp \
    $$PWD/TextRankExtractor.hpp \
-    $$PWD/Trie.hpp \
-    $$PWD/Unicode.hpp
+#    $$PWD/Trie.hpp \
+    $$PWD/Unicode.hpp \
+    $$PWD/DatTrie.hpp \
+    $$PWD/idf-trie/idf-trie.h \
+    $$PWD/segment-trie/segment-trie.h
+
+DISTFILES += \
+    dict/README.md \
+    dict/hmm_model.utf8 \
+    dict/idf.utf8 \
+    dict/jieba.dict.utf8 \
+    dict/pos_dict/char_state_tab.utf8 \
+    dict/pos_dict/prob_emit.utf8 \
+    dict/pos_dict/prob_start.utf8 \
+    dict/pos_dict/prob_trans.utf8 \
+    dict/stop_words.utf8 \
+    dict/user.dict.utf8
+    #dict/pinyinWithoutTone.txt \
+
 include(limonp/limonp.pri)
+
+SOURCES += \
+    $$PWD/idf-trie/idf-trie.cpp \
+    $$PWD/segment-trie/segment-trie.cpp
--- a/libchinese-segmentation/cppjieba/idf-trie/idf-trie.cpp
+++ b/libchinese-segmentation/cppjieba/idf-trie/idf-trie.cpp
@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#include "idf-trie.h"
+
+IdfTrie::IdfTrie(const vector<string> file_paths, string dat_cache_path)
+    : StorageBase<double, false, IdfCacheFileHeader>(file_paths, dat_cache_path)
+{
+    this->Init();
+}
+
+IdfTrie::IdfTrie(string file_path, string dat_cache_path)
+: StorageBase<double, false, IdfCacheFileHeader>(vector<string>{file_path}, dat_cache_path)
+{
+    this->Init();
+}
+
+void IdfTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
+{
+    IdfCacheFileHeader header;
+    assert(sizeof(header.md5_hex) == md5.size());
+    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
+    double idf_sum(0), idf_average(0), tmp(0);
+    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+    umask(S_IWGRP | S_IWOTH);
+    const int fd =mkstemp((char *)tmp_filepath.data());
+    assert(fd >= 0);
+    fchmod(fd, 0644);
+
+    write_bytes = write(fd, (const char *)&header, sizeof(IdfCacheFileHeader));
+
+    ifstream ifs(IDF_DICT_PATH);
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, " ");
+        if (buf.size() != 2)
+            continue;
+        this->Update(buf[0].c_str(), buf[0].size(), elements_num);
+        offset += sizeof(double);
+        elements_num++;
+        tmp = atof(buf[1].c_str());
+        write_bytes += write(fd, &tmp, sizeof(double));
+        idf_sum += tmp;
+    }
+    idf_average = idf_sum / elements_num;
+    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
+
+    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
+    write(fd, &elements_num, sizeof(int));
+    write(fd, &offset, sizeof(int));
+    data_trie_size = this->GetDataTrieSize();
+    write(fd, &data_trie_size, sizeof(int));
+    write(fd, &idf_average, sizeof(double));
+
+    close(fd);
+    assert((size_t)write_bytes == sizeof(IdfCacheFileHeader) + offset + this->GetDataTrieTotalSize());
+
+    const auto rename_ret = rename(tmp_filepath.c_str(), dat_cache_file.c_str());
+    assert(0 == rename_ret);
+}
+
+double IdfTrie::Find(const string &key) const
+{
+    int result = this->ExactMatchSearch(key.c_str(), key.size());
+    if (result < 0)
+        return -1;
+    return this->GetElementPtr()[result];
+}
+
+double IdfTrie::GetIdfAverage() const
+{
+    return this->GetCacheFileHeaderPtr()->idf_average;
+}
+
--- a/libchinese-segmentation/cppjieba/idf-trie/idf-trie.h
+++ b/libchinese-segmentation/cppjieba/idf-trie/idf-trie.h
@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef IdfTrie_H
+#define IdfTrie_H
+
+#include "storage-base.hpp"
+
+const char * const  IDF_DICT_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
+
+struct IdfCacheFileHeader : CacheFileHeaderBase
+{
+    double idf_average = 0;
+};
+
+class IdfTrie : public StorageBase<double, false, IdfCacheFileHeader>
+{
+public:
+    IdfTrie(const vector<string> file_paths, string dat_cache_path);
+    IdfTrie(string file_path, string dat_cache_path);
+    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
+    double Find(const string &key) const;
+    double GetIdfAverage() const;
+
+private:
+
+};
+
+#endif // IdfTrie_H
--- a/libchinese-segmentation/cppjieba/segment-trie/segment-trie.cpp
+++ b/libchinese-segmentation/cppjieba/segment-trie/segment-trie.cpp
@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#include <cmath>
+#include "segment-trie.h"
+
+DictTrie::DictTrie(const vector<string> file_paths, string dat_cache_path)
+    : StorageBase<DatMemElem, false, DictCacheFileHeader>(file_paths, dat_cache_path)
+{
+    this->Init();
+}
+
+DictTrie::DictTrie(const string &dict_path, const string &user_dict_paths, const string &dat_cache_path)
+    : StorageBase<DatMemElem, false, DictCacheFileHeader>(vector<string>{dict_path, user_dict_paths}, dat_cache_path)
+{
+    this->Init();
+}
+
+void DictTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
+{
+    DictCacheFileHeader header;
+    assert(sizeof(header.md5_hex) == md5.size());
+    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
+    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+    umask(S_IWGRP | S_IWOTH);
+    const int fd =mkstemp((char *)tmp_filepath.data());
+    assert(fd >= 0);
+    fchmod(fd, 0644);
+
+    write_bytes = write(fd, (const char *)&header, sizeof(DictCacheFileHeader));
+
+    this->PreLoad();
+    this->LoadDefaultDict(fd, write_bytes, offset, elements_num);
+    this->LoadUserDict(fd, write_bytes, offset, elements_num);
+
+    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
+
+    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
+    write(fd, &elements_num, sizeof(int));
+    write(fd, &offset, sizeof(int));
+    data_trie_size = this->GetDataTrieSize();
+    write(fd, &data_trie_size, sizeof(int));
+    write(fd, &m_min_weight, sizeof(double));
+
+    close(fd);
+    assert((size_t)write_bytes == sizeof(DictCacheFileHeader) + offset + this->GetDataTrieTotalSize());
+
+    const auto rename_ret = rename(tmp_filepath.c_str(), dat_cache_file.c_str());
+    assert(0 == rename_ret);
+}
+
+const DatMemElem * DictTrie::Find(const string &key) const
+{
+    int result = this->ExactMatchSearch(key.c_str(), key.size());
+    if (result < 0)
+        return nullptr;
+    return &this->GetElementPtr()[result];
+}
+
+
+
+void DictTrie::FindDatDag(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<DatDag> &res, size_t max_word_len) const {
+
+    res.clear();
+    res.resize(end - begin);
+
+    string text_str;
+    EncodeRunesToString(begin, end, text_str);
+
+    static const size_t max_num = 128;
+    result_pair_type result_pairs[max_num] = {};
+
+    for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) {
+
+        std::size_t num_results = this->CommonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
+
+        res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + 1, nullptr));
+
+        for (std::size_t idx = 0; idx < num_results; ++idx) {
+            auto & match = result_pairs[idx];
+
+            if ((match.value < 0) || ((size_t)match.value >= this->GetCacheFileHeaderPtr()->elements_size)) {
+                continue;
+            }
+
+            auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
+
+            if (char_num > max_word_len) {
+                continue;
+            }
+
+            const DatMemElem * pValue = &this->GetElementPtr()[match.value];
+
+            if (1 == char_num) {
+                res[i].nexts[0].second = pValue;
+                continue;
+            }
+
+            res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + char_num, pValue));
+        }
+
+        begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune);
+    }
+}
+
+void DictTrie::FindWordRange(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange> &words, size_t max_word_len) const {
+
+    string text_str;
+    EncodeRunesToString(begin, end, text_str);
+
+    static const size_t max_num = 128;
+    result_pair_type result_pairs[max_num] = {};//存放字典查询结果
+    size_t str_size = end - begin;
+    double max_weight[str_size];//存放逆向路径最大weight
+    for (size_t i = 0; i<str_size; i++) {
+        max_weight[i] = -3.14e+100;
+    }
+    size_t max_next[str_size];//存放动态规划后的分词结果
+    //memset(max_next,-1,str_size*sizeof(size_t));
+
+    double val(0);
+    for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
+        size_t nextPos = str_size - i;//逆向计算
+        begin_pos -= (end - i - 1)->len;
+
+        std::size_t num_results = this->CommonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
+        if (0 == num_results) {//字典不存在则单独分词
+            val = GetMinWeight();
+            if (nextPos  < str_size) {
+                val += max_weight[nextPos];
+            }
+            if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
+                max_weight[nextPos - 1] = val;
+                max_next[nextPos - 1] = nextPos;
+            }
+        } else {//字典存在则根据查询结果数量计算最大概率路径
+            for (std::size_t idx = 0; idx < num_results; ++idx) {
+                auto & match = result_pairs[idx];
+                if ((match.value < 0) || ((uint32_t)match.value >= this->GetCacheFileHeaderPtr()->elements_size)) {
+                    continue;
+                }
+                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
+                if (char_num > max_word_len) {
+                    continue;
+                }
+                auto * pValue = &this->GetElementPtr()[match.value];
+
+                val = pValue->weight;
+                if (1 == char_num) {
+                    if (nextPos  < str_size) {
+                        val += max_weight[nextPos];
+                    }
+                    if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
+                        max_weight[nextPos - 1] = val;
+                        max_next[nextPos - 1] = nextPos;
+                    }
+                } else {
+                    if (nextPos - 1 + char_num  < str_size) {
+                        val += max_weight[nextPos - 1 + char_num];
+                    }
+                    if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) {
+                        max_weight[nextPos - 1] = val;
+                        max_next[nextPos - 1] = nextPos - 1 + char_num;
+                    }
+                }
+            }
+        }
+    }
+    for (size_t i = 0; i < str_size;) {//统计动态规划结果
+        assert(max_next[i] > i);
+        assert(max_next[i] <= str_size);
+        WordRange wr(begin + i, begin + max_next[i] - 1);
+        words.push_back(wr);
+        i = max_next[i];
+    }
+}
+
+bool DictTrie::IsUserDictSingleChineseWord(const Rune &word) const {
+    return IsIn(m_user_dict_single_chinese_word, word);
+}
+
+void DictTrie::PreLoad()
+{
+    ifstream ifs(DICT_PATH);
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, " ");
+        if (buf.size() != 3)
+            continue;
+        m_freq_sum += atof(buf[1].c_str());
+    }
+}
+
+void DictTrie::LoadDefaultDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
+{
+    ifstream ifs(DICT_PATH);
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, " ");
+        if (buf.size() != 3)
+            continue;
+        DatMemElem node_info;
+        node_info.weight = log(atof(buf[1].c_str()) / m_freq_sum);
+        node_info.SetTag(buf[2]);
+        this->Update(buf[0].c_str(), buf[0].size(), elements_num);
+        offset += (sizeof(DatMemElem));
+        elements_num++;
+        if (m_min_weight > node_info.weight) {
+            m_min_weight = node_info.weight;
+        }
+        write_bytes += write(fd, &node_info, sizeof(DatMemElem));
+    }
+}
+
+void DictTrie::LoadUserDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
+{
+    ifstream ifs(USER_DICT_PATH);
+    string line;
+    vector<string> buf;
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, " ");
+        if (buf.size() != 3)
+            continue;
+        DatMemElem node_info;
+        assert(m_freq_sum > 0.0);
+        const int freq = atoi(buf[1].c_str());
+        node_info.weight = log(1.0 * freq / m_freq_sum);
+        node_info.SetTag(buf[2]);
+        this->Update(buf[0].c_str(), buf[0].size(), elements_num);
+        offset += (sizeof(DatMemElem));
+        elements_num++;
+        write_bytes += write(fd, &node_info, sizeof(DatMemElem));
+        if (Utf8CharNum(buf[0]) == 1) {
+            RuneArray word;
+            if (DecodeRunesInString(buf[0], word)) {
+                m_user_dict_single_chinese_word.insert(word[0]);
+            }
+        }
+    }
+}
+
+inline double DictTrie::GetMinWeight() const
+{
+    return this->GetCacheFileHeaderPtr()->min_weight;
+}
--- a/libchinese-segmentation/cppjieba/segment-trie/segment-trie.h
+++ b/libchinese-segmentation/cppjieba/segment-trie/segment-trie.h
@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef SegmentTrie_H
+#define SegmentTrie_H
+
+#include "storage-base.hpp"
+#include "cppjieba/Unicode.hpp"
+
+using namespace cppjieba;
+
+const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
+const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
+
+struct DictCacheFileHeader : CacheFileHeaderBase
+{
+    double min_weight = 0;
+};
+
+class DictTrie : public StorageBase<DatMemElem, false, DictCacheFileHeader>
+{
+public:
+    DictTrie(const vector<string> file_paths, string dat_cache_path = "");
+    DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "");
+    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
+
+    const DatMemElem *Find(const string &key) const;
+    void FindDatDag(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
+              vector<struct DatDag>&res, size_t max_word_len = MAX_WORD_LENGTH) const;
+    void FindWordRange(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
+              vector<WordRange>& words, size_t max_word_len = MAX_WORD_LENGTH) const;
+    bool IsUserDictSingleChineseWord(const Rune& word) const;
+
+private:
+    DictTrie();
+    void PreLoad();
+    void LoadDefaultDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
+    void LoadUserDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
+    double GetMinWeight() const;
+
+    double m_freq_sum = 0.0;
+    double m_min_weight = 3.14e+100;
+    unordered_set<Rune> m_user_dict_single_chinese_word;
+};
+
+#endif // SegmentTrie_H
--- a/libchinese-segmentation/development-files/header-files/ChineseSegmentation
+++ b/libchinese-segmentation/development-files/header-files/ChineseSegmentation
@ -0,0 +1 @@
+#include "chinese-segmentation.h"
--- a/libchinese-segmentation/development-files/header-files/HanZiToPinYin
+++ b/libchinese-segmentation/development-files/header-files/HanZiToPinYin
@ -0,0 +1 @@
+#include "hanzi-to-pinyin.h"
--- a/libchinese-segmentation/hanzi-to-pinyin-private.h
+++ b/libchinese-segmentation/hanzi-to-pinyin-private.h
@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#ifndef HANZITOPINYINPRIVATE_H
+#define HANZITOPINYINPRIVATE_H
+
+#include <QtCore/qglobal.h>
+#include <QHash>
+#include "pinyin4cpp_dictTrie.h"
+#include "hanzi-to-pinyin.h"
+#include "pinyin4cpp-trie.h"
+
+using namespace std;
+
+static const QHash<QString, QString> PhoneticSymbol = {
+    {"ā", "a1"}, {"á", "a2"}, {"ǎ", "a3"}, {"à", "a4"},
+    {"ē", "e1"}, {"é", "e2"}, {"ě", "e3"}, {"è", "e4"},
+    {"ō", "o1"}, {"ó", "o2"}, {"ǒ", "o3"}, {"ò", "o4"},
+    {"ī", "i1"}, {"í", "i2"}, {"ǐ", "i3"}, {"ì", "i4"},
+    {"ū", "u1"}, {"ú", "u2"}, {"ǔ", "u3"}, {"ù", "u4"},
+    // üe
+    {"ü", "v"},
+    {"ǖ", "v1"}, {"ǘ", "v2"}, {"ǚ", "v3"}, {"ǜ", "v4"},
+    {"ń", "n2"}, {"ň", "n3"}, {"ǹ", "n4"},
+    {"m̄", "m1"}, {"ḿ", "m2"}, {"m̀", "m4"},
+    {"ê̄", "ê1"}, {"ế", "ê2"}, {"ê̌", "ê3"}, {"ề", "ê4"}
+};
+
+#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
+
+class PINYINMANAGER_EXPORT HanZiToPinYinPrivate
+{
+public:
+    HanZiToPinYinPrivate(HanZiToPinYin *parent = nullptr);
+    ~HanZiToPinYinPrivate();
+
+public:
+    template <typename T>
+    bool isMultiTone(T &&t) {return m_pinYinTrie.IsMultiTone(std::forward<T>(t));}
+
+    bool contains(string &word);
+    int getResults(string &word, QStringList &results);
+    void setConfig(PinyinDataStyle dataStyle, SegType segType, PolyphoneType polyphoneType, ExDataProcessType processType);
+
+private:
+    void convertDataStyle(QStringList &results);
+
+    HanZiToPinYin *q = nullptr;
+    //Pinyin4cppDictTrie *m_pinYinTrie = nullptr;
+    Pinyin4cppTrie m_pinYinTrie;
+
+    SegType m_segType = SegType::Segmentation;
+    PolyphoneType m_polyphoneType = PolyphoneType::Disable;
+    PinyinDataStyle m_pinyinDataStyle = PinyinDataStyle::Default;
+    ExDataProcessType m_exDataProcessType = ExDataProcessType::Default;
+};
+#endif // HANZITOPINYINPRIVATE_H
--- a/libchinese-segmentation/hanzi-to-pinyin.cpp
+++ b/libchinese-segmentation/hanzi-to-pinyin.cpp
@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#include <mutex>
+#include <cctype>
+#include "hanzi-to-pinyin.h"
+#include "hanzi-to-pinyin-private.h"
+#include "chinese-segmentation.h"
+#include "cppjieba/Unicode.hpp"
+
+HanZiToPinYin * HanZiToPinYin::g_pinYinManager = nullptr;
+std::once_flag g_singleFlag;
+
+bool HanZiToPinYinPrivate::contains(string &word)
+{
+    return m_pinYinTrie.Contains(word);
+}
+
+int HanZiToPinYinPrivate::getResults(string &word, QStringList &results)
+{
+    results.clear();
+
+    string directResult = m_pinYinTrie.Find(word);
+
+    if (directResult == string()) {
+        if (m_segType == SegType::NoSegmentation) {//无分词、无结果直接返回-1
+            return -1;
+        } else {//无结果、启用分词
+            vector<string> segResults = ChineseSegmentation::getInstance()->callMixSegmentCutStr(word);
+            string data;
+            for (string &info : segResults) {
+                if (info == string()) {
+                    continue;
+                }
+                data = m_pinYinTrie.Find(info);
+                if (data == string()) {//分词后无结果
+                    if (cppjieba::IsSingleWord(info)) {//单个字符
+                        if (m_exDataProcessType == ExDataProcessType::Default) {//原数据返回
+                            results.append(QString().fromStdString(info));
+                        } else if (m_exDataProcessType == ExDataProcessType::Delete) {//忽略
+                            continue;
+                        }
+                    } else {//多个字符
+                        string oneWord;
+                        cppjieba::RuneStrArray runeArray;
+                        cppjieba::DecodeRunesInString(info, runeArray);
+                        for (auto i = runeArray.begin(); i != runeArray.end(); ++i) {
+                            oneWord = cppjieba::GetStringFromRunes(info, i, i);
+                            data = m_pinYinTrie.Find(oneWord);
+                            if (data == string()) {//单字无结果则按设置返回
+                                if (m_exDataProcessType == ExDataProcessType::Default) {//原数据返回
+                                    results.append(QString().fromStdString(oneWord));
+                                } else if (m_exDataProcessType == ExDataProcessType::Delete) {//忽略
+                                    continue;
+                                }
+                            }
+                            if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
+                                results.append(QString().fromStdString(data));
+                            } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
+                                if (limonp::IsInStr(data, ',')) {
+                                    results.append(QString().fromStdString(data.substr(0, data.find_first_of(",", 0))));
+                                } else {
+                                    results.append(QString().fromStdString(data));
+                                }
+                            }
+                        }
+                    }
+                } else {//分词后有结果
+                    if (cppjieba::IsSingleWord(info)) {//单个字符
+                        if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
+                            results.append(QString().fromStdString(data));
+                        } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
+                            if (limonp::IsInStr(data, ',')) {
+                                results.append(QString().fromStdString(data.substr(0, data.find_first_of(",", 0))));
+                            } else {
+                                results.append(QString().fromStdString(data));
+                            }
+                        }
+                    } else {//多个字符
+                        vector<string> dataVec = limonp::Split(data, "/");
+                        if (dataVec.size() == 1) {//无多音词
+                            vector<string> dataVec = limonp::Split(data, ",");
+                            for (auto &oneResult : dataVec) {
+                                results.append(QString().fromStdString(oneResult));
+                            }
+                        } else {
+                            if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
+                                int wordSize = limonp::Split(dataVec[0], ",").size();
+                                for (int i = 0; i < wordSize; ++i) {
+                                    QStringList oneResult;
+                                    for (size_t j = 0; j < dataVec.size(); ++j) {
+                                        oneResult.append(QString().fromStdString(limonp::Split(dataVec[j], ",")[i]));
+                                    }
+                                    results.append(oneResult.join('/'));
+                                }
+                            } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
+                                vector<string> tmp = limonp::Split(dataVec[0], ",");
+                                for (auto &oneResult : tmp) {
+                                    results.append(QString().fromStdString(oneResult));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    } else {//可以直接查到结果
+        if (cppjieba::IsSingleWord(word)) {//单个字符
+            if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
+                results.append(QString().fromStdString(directResult));
+            } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
+                if (limonp::IsInStr(directResult, ',')) {
+                    results.append(QString().fromStdString(directResult.substr(0, directResult.find_first_of(",", 0))));
+                } else {
+                    results.append(QString().fromStdString(directResult));
+                }
+            }
+        } else {//多个字符
+            vector<string> dataVec = limonp::Split(directResult, "/");
+            if (dataVec.size() == 1) {//无多音词
+                vector<string> dataVec = limonp::Split(directResult, ",");
+                for (auto &oneResult : dataVec) {
+                    results.append(QString().fromStdString(oneResult));
+                }
+            } else {
+                if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
+                    int wordSize = limonp::Split(dataVec[0], ",").size();
+                    for (int i = 0; i < wordSize; ++i) {
+                        QStringList oneResult;
+                        for (size_t j = 0; j < dataVec.size(); ++j) {
+                            oneResult.append(QString().fromStdString(limonp::Split(dataVec[j], ",")[i]));
+                        }
+                        results.append(oneResult.join('/'));
+                    }
+                } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
+                    vector<string> tmp = limonp::Split(dataVec[0], ",");
+                    for (auto &oneResult : tmp) {
+                        results.append(QString().fromStdString(oneResult));
+                    }
+                }
+            }
+        }
+    }
+    convertDataStyle(results);
+    return 0;//todo
+}
+
+void HanZiToPinYinPrivate::setConfig(PinyinDataStyle dataStyle, SegType segType, PolyphoneType polyphoneType, ExDataProcessType processType)
+{
+    m_pinyinDataStyle = dataStyle;
+    m_segType = segType;
+    m_polyphoneType = polyphoneType;
+    m_exDataProcessType = processType;
+}
+
+void HanZiToPinYinPrivate::convertDataStyle(QStringList &results)
+{
+    QString value;
+    if (m_pinyinDataStyle == PinyinDataStyle::Default) {
+        for (QString &info : results) {
+            if(info == ",") {
+                continue;
+            }
+            //if info's length was been changed, there's someting wrong while traverse the chars of info
+            for (const QChar &c : info) {
+                if (!isalpha(c.toLatin1())) {
+                    value = PhoneticSymbol.value(c);
+                    if (!value.isEmpty()) {
+                        info.replace(c, value.at(0));
+                    }
+                }
+            }
+
+            QStringList tmpList = info.split(',', QString::SkipEmptyParts); //去重(保持原顺序)
+            QStringList tmpValue;
+            for (auto &str : tmpList) {
+                if (!tmpValue.contains(str)) {
+                    tmpValue.push_back(str);
+                }
+            }
+            info = tmpValue.join(",");
+        }
+    } else if (m_pinyinDataStyle == PinyinDataStyle::Tone) {
+        //无需处理
+    } else if (m_pinyinDataStyle == PinyinDataStyle::Tone2) {
+        for (QString &info : results) {
+            for (int i = 0; i < info.size();) {
+                auto c = info.at(i);
+                if (!isalpha(c.toLatin1())) {
+                    value = PhoneticSymbol.value(c);
+                    if (!value.isEmpty()) {
+                        info.replace(c, PhoneticSymbol.value(c));
+                        i += PhoneticSymbol.value(c).size();
+                        continue;
+                    }
+                }
+                i++;
+            }
+        }
+    } else if (m_pinyinDataStyle == PinyinDataStyle::Tone3) {
+        for (QString &info : results) {
+            if(info == "/") {
+                continue;
+            }
+            bool isPolyphoneWords(false);
+            if (info.contains("/")) {
+                isPolyphoneWords = true;
+                info.replace("/", ",");
+            }
+
+            for (int i = 0; i < info.size();) {
+                auto c = info.at(i);
+                if (!isalpha(c.toLatin1())) {
+                    value = PhoneticSymbol.value(c);
+                    if (!value.isEmpty()) {
+                        info.replace(i, 1, value.at(0));
+                        //多音词模式
+                        if (info.contains(",")) {
+                            int pos = info.indexOf(',', i);
+                            if (isPolyphoneWords) {
+                                info.replace(",", "/");
+                            }
+                            //最后一个读音时
+                            if (pos == -1) {
+                                info.append(value.at(1));
+                                break;
+                            }
+                            info.insert(pos, value.at(1));
+                            i = pos + 1;    //insert导致','的位置加一，将i行进到','的位置
+                            i++;
+                            continue;
+                        } else {
+                            info.append(value.at(1));
+                            break;
+                        }
+                    }
+                }
+                i++;
+            }
+
+        }
+    } else if (m_pinyinDataStyle == PinyinDataStyle::FirstLetter) {
+        for (QString &info : results) {
+            if(info == "," or info == "/") {
+                continue;
+            }
+
+            bool isPolyphoneWords(false);
+            if (info.contains("/")) {
+                isPolyphoneWords = true;
+                info.replace("/", ",");
+            }
+
+            for (int i = 0; i < info.size();i++) {
+                auto c = info.at(i);
+                if (!isalpha(c.toLatin1())) {
+                    value = PhoneticSymbol.value(c);
+                    if (!value.isEmpty()) {
+                        info.replace(c, value.at(0));
+                    }
+                }
+            }
+
+            QStringList tmpList = info.split(',', QString::SkipEmptyParts); //去重(保持原顺序)
+            QStringList tmpValue;
+            for (auto &str : tmpList) {
+                if (!tmpValue.contains(str)) {
+                    tmpValue.push_back(str.at(0));
+                }
+            }
+            if (isPolyphoneWords) {
+                info = tmpValue.join("/");
+            } else {
+                info = tmpValue.join(",");
+            }
+        }
+    } else if (m_pinyinDataStyle == PinyinDataStyle::English) {
+        //暂不支持
+    }
+}
+
+HanZiToPinYinPrivate::HanZiToPinYinPrivate(HanZiToPinYin *parent) : q(parent)
+{
+    //const char * const  SINGLE_WORD_PINYIN_PATH = "/usr/share/ukui-search/res/dict/singleWordPinyin.txt";
+    //const char * const  WORDS_PINYIN_PATH = "/usr/share/ukui-search/res/dict/wordsPinyin.txt";
+    //m_pinYinTrie = new Pinyin4cppDictTrie(SINGLE_WORD_PINYIN_PATH, WORDS_PINYIN_PATH);
+    //m_pinYinTrie = new Pinyin4cppTrie;
+}
+
+HanZiToPinYinPrivate::~HanZiToPinYinPrivate()
+{
+//    if (m_pinYinTrie){
+//        delete m_pinYinTrie;
+//        m_pinYinTrie = nullptr;
+//    }
+}
+
+HanZiToPinYin * HanZiToPinYin::getInstance()
+{
+    call_once(g_singleFlag, []() {
+        g_pinYinManager = new HanZiToPinYin;
+    });
+    return g_pinYinManager;
+}
+
+bool HanZiToPinYin::contains(string &word)
+{
+    return d->contains(word);
+}
+
+bool HanZiToPinYin::isMultiTone(string &word)
+{
+    return d->isMultiTone(word);
+}
+
+bool HanZiToPinYin::isMultiTone(string &&word)
+{
+    return d->isMultiTone(word);
+}
+
+bool HanZiToPinYin::isMultiTone(const string &word)
+{
+    return d->isMultiTone(word);
+}
+
+bool HanZiToPinYin::isMultiTone(const string &&word)
+{
+    return d->isMultiTone(word);
+}
+
+int HanZiToPinYin::getResults(string word, QStringList &results)
+{
+    return d->getResults(word, results);
+}
+
+void HanZiToPinYin::setConfig(PinyinDataStyle dataStyle, SegType segType, PolyphoneType polyphoneType, ExDataProcessType processType)
+{
+    d->setConfig(dataStyle, segType, polyphoneType, processType);
+}
+
+HanZiToPinYin::HanZiToPinYin() : d(new HanZiToPinYinPrivate)
+{
+}
--- a/libchinese-segmentation/hanzi-to-pinyin.h
+++ b/libchinese-segmentation/hanzi-to-pinyin.h
@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#ifndef HANZITOPINYIN_H
+#define HANZITOPINYIN_H
+
+#include <QtCore/qglobal.h>
+#include <QStringList>
+#include "pinyin4cpp-common.h"
+#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
+
+using namespace std;
+
+class HanZiToPinYinPrivate;
+class PINYINMANAGER_EXPORT HanZiToPinYin
+{
+public:
+    static HanZiToPinYin * getInstance();
+
+public:
+    /**
+     * @brief HanZiToPinYin::isMultiTone 判断是否为多音字/词/句
+     * @param word 要判断的字/词/句
+     * @return bool 不是返回false
+     */
+    bool isMultiTone(string &word);
+    bool isMultiTone(string &&word);
+    bool isMultiTone(const string &word);
+    bool isMultiTone(const string &&word);
+
+    /**
+     * @brief HanZiToPinYin::contains 查询某个字/词/句是否有拼音（是否在数据库包含）
+     * @param word 要查询的字/词/句
+     * @return bool 数据库不包含返回false
+     */
+    bool contains(string &word);
+
+    /**
+     * @brief HanZiToPinYin::getResults 获取某个字/词/句的拼音
+     * @param word 要获取拼音的字/词/句
+     * @param results word的拼音列表（有可能多音字），每次调用results会被清空
+     * @return int 获取到返回0，否则返回-1
+     */
+    int getResults(string word, QStringList &results);
+
+    /**
+     * @brief setConfig 设置HanZiToPinYin的各项功能，详见pinyin4cpp-common.h
+     * @param dataStyle 返回数据风格，默认defult
+     * @param segType 是否启用分词，默认启用
+     * @param polyphoneType 是否启用多音字，默认不启用
+     * @param processType 无拼音数据处理模式，默认defult
+     */
+    void setConfig(PinyinDataStyle dataStyle,SegType segType,PolyphoneType polyphoneType,ExDataProcessType processType);
+
+protected:
+    HanZiToPinYin();
+    ~HanZiToPinYin();
+    HanZiToPinYin(const HanZiToPinYin&) = delete;
+    HanZiToPinYin& operator =(const HanZiToPinYin&) = delete;
+private:
+    static HanZiToPinYin *g_pinYinManager;
+    HanZiToPinYinPrivate *d = nullptr;
+};
+
+#endif // PINYINMANAGER_H
--- a/libchinese-segmentation/libchinese-segmentation.pro
+++ b/libchinese-segmentation/libchinese-segmentation.pro
@ -1,39 +1,50 @@
 QT -= gui

-VERSION = 0.0.1
+VERSION = 1.1.0
 TARGET =  chinese-segmentation
 TEMPLATE = lib
 DEFINES += LIBCHINESESEGMENTATION_LIBRARY
+DEFINES += VERSION='\\"$${VERSION}\\"'

-CONFIG += c++11
+CONFIG += c++11 create_pc create_prl no_install_prl

 # The following define makes your compiler emit warnings if you use
 # any Qt feature that has been marked deprecated (the exact warnings
 # depend on your compiler). Please consult the documentation of the
 # deprecated API in order to know how to port your code away from it.
 DEFINES += QT_DEPRECATED_WARNINGS
+QMAKE_CXXFLAGS += -Werror=return-type -Werror=return-local-addr
+#QMAKE_CXXFLAGS += -Werror=uninitialized
+QMAKE_CXXFLAGS += -execution-charset:utf-8

 # You can also make your code fail to compile if it uses deprecated APIs.
 # In order to do so, uncomment the following line.
 # You can also select to disable deprecated APIs only up to a certain version of Qt.
 #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
 include(cppjieba/cppjieba.pri)
+include(pinyin4cpp/pinyin4cpp.pri)
+include(storage-base/storage-base-cedar.pri)

 #LIBS += -L/usr/local/lib/libjemalloc -ljemalloc

 SOURCES += \
    chinese-segmentation.cpp \
-    pinyinmanager.cpp
+    hanzi-to-pinyin.cpp

 HEADERS += \
+    chinese-segmentation-private.h \
    chinese-segmentation.h \
-    libchinese-segmentation_global.h \
-    pinyinmanager.h
+    common-struct.h \
+    hanzi-to-pinyin-private.h \
+    hanzi-to-pinyin.h \
+    pinyin4cpp-common.h \
+    libchinese-segmentation_global.h

 dict_files.path = /usr/share/ukui-search/res/dict/
 dict_files.files = $$PWD/dict/*.utf8\
 dict_files.files += $$PWD/dict/pos_dict/*.utf8\
 dict_files.files += $$PWD/dict/*.txt\
+dict_files.files += $$PWD/pinyin4cpp/dict/*.txt

 INSTALLS += \
    dict_files \
@ -41,28 +52,28 @@ INSTALLS += \
 # Default rules for deployment.
 unix {
    target.path = $$[QT_INSTALL_LIBS]
-}
+    QMAKE_PKGCONFIG_NAME = chinese-segmentation
+    QMAKE_PKGCONFIG_DESCRIPTION = chinese-segmentation Header files
+    QMAKE_PKGCONFIG_VERSION = $$VERSION
+    QMAKE_PKGCONFIG_LIBDIR = $$target.path
+    QMAKE_PKGCONFIG_DESTDIR = pkgconfig
+    QMAKE_PKGCONFIG_INCDIR = /usr/include/chinese-seg
+    QMAKE_PKGCONFIG_CFLAGS += -I/usr/include/chinese-seg
+
 !isEmpty(target.path): INSTALLS += target

-    header.path = /usr/include/chinese-seg/
-    header.files += *.h
-    headercppjieba.path = /usr/include/chinese-seg/cppjieba/
-    headercppjieba.files = cppjieba/*
-    INSTALLS += header headercppjieba
+    header.path = /usr/include/chinese-seg
+    header.files += chinese-segmentation.h libchinese-segmentation_global.h common-struct.h hanzi-to-pinyin.h pinyin4cpp-common.h
+    header.files += development-files/header-files/*
+#    headercppjieba.path = /usr/include/chinese-seg/cppjieba/
+#    headercppjieba.files = cppjieba/*
+    INSTALLS += header
+}
+

 #DISTFILES += \
 #    jiaba/jieba.pri

 DISTFILES += \
-    dict/README.md \
-    dict/hmm_model.utf8 \
-    dict/idf.utf8 \
-    dict/jieba.dict.utf8 \
-    dict/pos_dict/char_state_tab.utf8 \
-    dict/pos_dict/prob_emit.utf8 \
-    dict/pos_dict/prob_start.utf8 \
-    dict/pos_dict/prob_trans.utf8 \
-    dict/stop_words.utf8 \
-    dict/user.dict.utf8 \
-    dict/pinyinWithoutTone.txt
-
+    development-files/header-files/* \
+    pinyin4cpp/pinyin4cpp.pri
--- a/libchinese-segmentation/pinyin4cpp-common.h
+++ b/libchinese-segmentation/pinyin4cpp-common.h
@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#ifndef PINYIN4CPP_COMMON_H
+#define PINYIN4CPP_COMMON_H
+
+/**
+ * @brief The PinyinDataStyle enum
+ * Default 默认模式，“中心” return “zhong xin”
+ * Tone 带读音模式 #“中心” return “zhōng xīn”
+ * Tone2 带读音模式2 #“中心” return “zho1ng xi1n”
+ * Tone3 带读音模式3 #“中心” return “zhong1 xin1”
+ * FirstLetter 首字母模式 #“中心” return “z x”
+ * English 英文翻译模式(暂不支持) #“中心” return “center,heart,core”
+ */
+enum class PinyinDataStyle {
+    Default       = 1u << 0,
+    Tone          = 1u << 1,
+    Tone2         = 1u << 2,
+    Tone3         = 1u << 3,
+    FirstLetter   = 1u << 4,
+    English       = 1u << 5
+};
+
+/**
+ * @brief The SegType enum
+ * Segmentation 默认带分词 #“银河麒麟”->“银河”“麒麟”
+ * NoSegmentation 无分词模式 #“银河麒麟”
+ */
+enum class SegType {
+    Segmentation    = 1u << 0,
+    NoSegmentation  = 1u << 1
+};
+
+/**
+ * @brief The PolyphoneType enum
+ * Disable 默认不启用多音字，“奇安信”return “qi an xin”多音字按照常用读音返回
+ * Enable 启用多音字 “奇安信” return“qi,ji an xin”
+ * 注意：多音词返回格式为 “朝阳” return "zhao/chao yang/yang"
+ */
+enum class PolyphoneType {
+    Disable       = 1u << 0,
+    Enable        = 1u << 1
+};
+
+/**
+ * @brief The ExDataProcessType enum
+ * Default 默认无拼音数据直接返回，“123木头人” return "123 mu tou ren"（分词模式）
+ * Delete  删除多余数据，#“123木头人” return "mu tou ren"（分词模式）
+ */
+enum class ExDataProcessType {
+    Default       = 1u << 0,
+    Delete        = 1u << 1
+};
+
+#endif //PINYIN4CPP_COMMON_H
--- a/libchinese-segmentation/pinyin4cpp/dict/singleWordPinyin.txt
+++ b/libchinese-segmentation/pinyin4cpp/dict/singleWordPinyin.txt
--- a/libchinese-segmentation/pinyin4cpp/dict/wordsPinyin.txt
+++ b/libchinese-segmentation/pinyin4cpp/dict/wordsPinyin.txt
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.cpp
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.cpp
@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#include "pinyin4cpp-trie.h"
+
+Pinyin4cppTrie::Pinyin4cppTrie(string dat_cache_path)
+    : StorageBase<char, false, CacheFileHeaderBase>(vector<string>{SINGLE_WORD_PINYIN_PATH, WORDS_PINYIN_PATH}, dat_cache_path)
+{
+    this->Init();
+}
+
+Pinyin4cppTrie::Pinyin4cppTrie(const vector<string> file_paths, string dat_cache_path)
+    : StorageBase<char, false, CacheFileHeaderBase>(file_paths, dat_cache_path)
+{
+    this->Init();
+}
+
+
+
+bool Pinyin4cppTrie::Contains(string &word) {
+    if (this->Find(word) != string())
+        return true;
+    return false;
+}
+
+bool Pinyin4cppTrie::IsMultiTone(const string &word) {
+    string result = this->Find(word);
+    if (result.find(",") == result.npos)
+        return true;
+    return false;
+}
+
+void Pinyin4cppTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
+{
+    CacheFileHeaderBase header;
+    assert(sizeof(header.md5_hex) == md5.size());
+    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
+    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+    umask(S_IWGRP | S_IWOTH);
+    const int fd =mkstemp((char *)tmp_filepath.data());
+    assert(fd >= 0);
+    fchmod(fd, 0644);
+
+    write_bytes = write(fd, (const char *)&header, sizeof(CacheFileHeaderBase));
+
+    this->LoadSingleWordDict(fd, write_bytes, offset, elements_num);
+    this->LoadWordsDict(fd, write_bytes, offset, elements_num);
+
+    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
+
+    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
+    write(fd, &elements_num, sizeof(int));
+    write(fd, &offset, sizeof(int));
+    data_trie_size = this->GetDataTrieSize();
+    write(fd, &data_trie_size, sizeof(int));
+
+    close(fd);
+    assert((size_t)write_bytes == sizeof(CacheFileHeaderBase) + offset + this->GetDataTrieTotalSize());
+
+    const auto rename_ret = rename(tmp_filepath.c_str(), dat_cache_file.c_str());
+    assert(0 == rename_ret);
+}
+
+string Pinyin4cppTrie::Find(const string &key)
+{
+    int result = this->ExactMatchSearch(key.c_str(), key.size());
+    if (result < 0)
+        return string();
+    return string(&this->GetElementPtr()[result]);
+}
+
+void Pinyin4cppTrie::LoadSingleWordDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
+{
+    ifstream ifs(SINGLE_WORD_PINYIN_PATH);
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, ":");
+        if (buf.size() != 3)
+            continue;
+        this->Update(buf[2].c_str(), buf[2].size(), offset);
+        offset += (buf[1].size() + 1);
+        elements_num++;
+        write_bytes += write(fd, buf[1].c_str(), buf[1].size() + 1);
+    }
+}
+
+void Pinyin4cppTrie::LoadWordsDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
+{
+    ifstream ifs(WORDS_PINYIN_PATH);
+    string line;
+    vector<string> buf;
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, ":");
+        if (buf.size() != 2)
+            continue;
+        this->Update(buf[0].c_str(), buf[0].size(), offset);
+        offset += (buf[1].size() + 1);
+        elements_num++;
+        write_bytes += write(fd, buf[1].c_str(), buf[1].size() + 1);
+    }
+}
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.h
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.h
@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef PINYIN4CPPTRIE_H
+#define PINYIN4CPPTRIE_H
+
+#include "storage-base.hpp"
+
+const char * const  SINGLE_WORD_PINYIN_PATH = "/usr/share/ukui-search/res/dict/singleWordPinyin.txt";
+const char * const  WORDS_PINYIN_PATH = "/usr/share/ukui-search/res/dict/wordsPinyin.txt";
+
+class Pinyin4cppTrie : public StorageBase<char, false, CacheFileHeaderBase>
+{
+public:
+    Pinyin4cppTrie(string dat_cache_path = "");
+    Pinyin4cppTrie(const vector<string> file_paths, string dat_cache_path = "");
+    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
+    string Find(const string &key);
+    bool Contains(string &word);
+    bool IsMultiTone(const string &word);
+
+private:
+    void LoadSingleWordDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
+    void LoadWordsDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
+};
+
+#endif // PINYIN4CPPTRIE_H
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp.pri
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp.pri
@ -0,0 +1,15 @@
+INCLUDEPATH += $$PWD
+
+HEADERS += \
+    $$PWD/pinyin4cpp-trie.h \
+    $$PWD/pinyin4cpp_dataTrie.h \
+    $$PWD/pinyin4cpp_dictTrie.h
+
+SOURCES += \
+    $$PWD/pinyin4cpp-trie.cpp \
+    $$PWD/pinyin4cpp_dataTrie.cpp \
+    $$PWD/pinyin4cpp_dictTrie.cpp
+
+DISTFILES += \
+    pinyin4cpp/dict/wordsPinyin.txt \
+    pinyin4cpp/dict/singleWordPinyin.txt
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.cpp
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.cpp
@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#include "pinyin4cpp_dataTrie.h"
+
+Pinyin4cppDataTrie::Pinyin4cppDataTrie()
+{
+
+}
+
+Pinyin4cppDataTrie::~Pinyin4cppDataTrie()
+{
+    munmap(m_mmapAddr, m_mmapLength);
+    m_mmapAddr = nullptr;
+    close(m_mmapFd);
+    m_mmapFd = -1;
+}
+
+string Pinyin4cppDataTrie::Find(const string &key) const {
+//    darts-clone的接口方法
+    Darts::DoubleArray::result_pair_type find_result;
+    m_DoubleArrayDataTrie.exactMatchSearch(key.c_str(), find_result);
+    if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= m_elementsSize)) {//todo
+        return string();
+    }
+    return string(&m_elementsPtr[find_result.value]);
+
+//  cedarpp的接口方法
+//    int result = m_DoubleArrayDataTrie.exactMatchSearch<int>(key.c_str(), key.size());
+//    if (result < 0)
+//        return string();
+//    return string(&m_elementsPtr[result]);
+
+}
+
+bool Pinyin4cppDataTrie::InitBuildDat(map<string, string> &elements, const string &dat_cache_file, const string &md5) {
+    BuildDatCache(elements, dat_cache_file, md5);
+    return InitAttachDat(dat_cache_file, md5);
+}
+
+bool Pinyin4cppDataTrie::InitAttachDat(const string &dat_cache_file, const string &md5) {
+    m_mmapFd = open(dat_cache_file.c_str(), O_RDONLY);
+
+    if (m_mmapFd < 0) {
+        return false;
+    }
+
+    const auto seek_off = lseek(m_mmapFd, 0, SEEK_END);
+    assert(seek_off >= 0);
+
+    m_mmapLength = seek_off;
+    m_mmapAddr = reinterpret_cast<char *>(mmap(NULL, m_mmapLength, PROT_READ, MAP_SHARED, m_mmapFd, 0));
+    assert(MAP_FAILED != m_mmapAddr);
+    assert(m_mmapLength >= sizeof(CacheFileHeader));
+
+    CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(m_mmapAddr);
+    m_elementsNum = header.elements_num;
+    m_elementsSize = header.elements_size;
+    assert(sizeof(header.md5_hex) == md5.size());
+
+    if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
+        return false;
+    }
+
+    assert(m_mmapLength == sizeof(CacheFileHeader) + header.elements_size  + header.dat_size * m_DoubleArrayDataTrie.unit_size());
+
+    m_elementsPtr = (const char *)(m_mmapAddr + sizeof(CacheFileHeader));
+    const char * dat_ptr = m_mmapAddr + sizeof(CacheFileHeader) + header.elements_size;
+    m_DoubleArrayDataTrie.set_array((char *)dat_ptr, header.dat_size);
+    return true;
+}
+
+void Pinyin4cppDataTrie::BuildDatCache(map<string, string> &elements, const string &dat_cache_file, const string &md5) {
+    vector<const char*> keys_ptr_vec;
+    vector<int> values_vec;
+    vector<string> mem_elem_vec;
+
+    keys_ptr_vec.reserve(elements.size());
+    values_vec.reserve(elements.size());
+    mem_elem_vec.reserve(elements.size());
+
+    CacheFileHeader header;
+    assert(sizeof(header.md5_hex) == md5.size());
+    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+    int offset(0);
+    for (auto &info:elements) {
+        keys_ptr_vec.push_back(info.first.c_str());
+        values_vec.push_back(offset);
+        offset += (info.second.size() + 1);//+1指字符串后加\0
+        assert(info.second.size() > 0);
+        mem_elem_vec.push_back(info.second);
+    }
+
+    auto const ret = m_DoubleArrayDataTrie.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
+    assert(0 == ret);
+    header.elements_num = mem_elem_vec.size();
+    header.elements_size = offset;
+    header.dat_size = m_DoubleArrayDataTrie.size();
+
+    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+    umask(S_IWGRP | S_IWOTH);
+    const int fd =mkstemp((char *)tmp_filepath.data());
+    assert(fd >= 0);
+    fchmod(fd, 0644);
+
+    auto write_bytes = write(fd, (const char *)&header, sizeof(header));
+    for (size_t i = 0; i < elements.size(); ++i) {
+        write_bytes += write(fd, mem_elem_vec[i].c_str(), mem_elem_vec[i].size() + 1);
+    }
+    write_bytes += write(fd, m_DoubleArrayDataTrie.array(), m_DoubleArrayDataTrie.total_size());
+
+    assert((size_t)write_bytes == sizeof(header) + offset + m_DoubleArrayDataTrie.total_size());
+    close(fd);
+
+    const auto rename_ret = rename(tmp_filepath.c_str(), dat_cache_file.c_str());
+    assert(0 == rename_ret);
+}
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.h
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.h
@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef PINYIN4cpp_DATATRIE_H
+#define PINYIN4cpp_DATATRIE_H
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <QDebug>
+#include "Md5.hpp"
+#include "LocalVector.hpp"
+#include "StringUtil.hpp"
+//#define USE_REDUCED_TRIE
+#include "../storage-base/cedar/cedar.h"
+#include "../storage-base/darts-clone/darts.h"
+
+using namespace std;
+using std::pair;
+
+struct CacheFileHeader { //todo 字节对齐
+    char md5_hex[32] = {};
+    uint32_t elements_num = 0;
+    uint32_t elements_size = 0;
+    uint32_t dat_size = 0;
+};
+
+class Pinyin4cppDataTrie {
+public:
+    Pinyin4cppDataTrie();
+    ~Pinyin4cppDataTrie();
+
+    string Find(const string & key) const;
+
+    bool InitBuildDat(map<string, string>& elements, const string & dat_cache_file, const string & md5);
+
+    bool InitAttachDat(const string & dat_cache_file, const string & md5);
+
+private:
+    void BuildDatCache(map<string, string>& elements, const string & dat_cache_file, const string & md5);
+
+    Pinyin4cppDataTrie(const Pinyin4cppDataTrie &);
+    Pinyin4cppDataTrie &operator=(const Pinyin4cppDataTrie &);
+
+private:
+    Darts::DoubleArray m_DoubleArrayDataTrie;
+    //cedar::da<int, -1, -2, true> m_DoubleArrayDataTrie;
+    const char * m_elementsPtr = nullptr;
+    size_t m_elementsNum = 0;
+    size_t m_elementsSize = 0;
+    size_t m_mmapLength = 0;
+
+    int    m_mmapFd = -1;
+    char * m_mmapAddr = nullptr;
+};
+
+#endif //PINYIN4cpp_DATATRIE_H
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dictTrie.cpp
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dictTrie.cpp
@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#include "pinyin4cpp_dictTrie.h"
+#include "malloc.h"
+
+Pinyin4cppDictTrie::Pinyin4cppDictTrie(const string &single_word_dict_path, const string &words_dict_paths, const string &dat_cache_path) {
+    Init(single_word_dict_path, words_dict_paths, dat_cache_path);
+}
+
+string Pinyin4cppDictTrie::Find(const string &word) const {
+    return m_DataTrie.Find(word);
+}
+
+bool Pinyin4cppDictTrie::Contains(string &word) {
+    if (m_DataTrie.Find(word) != string())
+        return true;
+    return false;
+}
+
+bool Pinyin4cppDictTrie::IsMultiTone(const string &word) {
+    string result = m_DataTrie.Find(word);
+    if (result.find(",") == result.npos)
+        return true;
+    return false;
+}
+
+size_t Pinyin4cppDictTrie::GetTotalDictSize() const {
+    return m_TotalDictSize_;
+}
+
+void Pinyin4cppDictTrie::Init(const string &single_word_dict_path, const string &words_dict_paths, string dat_cache_path) {
+    const auto dict_list = single_word_dict_path + "|" + words_dict_paths;
+    size_t file_size_sum = 0;
+    const string md5 = CalcFileListMD5(dict_list, file_size_sum);
+    m_TotalDictSize_ = file_size_sum;
+
+    if (dat_cache_path.empty()) {
+        dat_cache_path = "/tmp/" + md5 + ".dat_cache";//未指定词库数据文件存储位置的默认存储在tmp目录下
+    }
+    qDebug() << "#####Pinyin Dict path:" << dat_cache_path.c_str();
+    if (m_DataTrie.InitAttachDat(dat_cache_path, md5)) {
+        return;
+    }
+
+    LoadSingleWordDict(single_word_dict_path);
+    LoadWordsDict(words_dict_paths);
+    bool build_ret = m_DataTrie.InitBuildDat(m_StaticNodeInfos, dat_cache_path, md5);
+    assert(build_ret);
+    m_StaticNodeInfos.clear();
+    malloc_trim(0);
+}
+
+void Pinyin4cppDictTrie::LoadSingleWordDict(const string &filePath) {
+    ifstream ifs(filePath.c_str());
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#")) {
+            continue;
+        }
+        limonp::Split(line, buf, ":");
+        assert(buf.size() == SINGLE_WORD_DICT_COLUMN_NUM);
+        if (m_StaticNodeInfos.find(buf[2]) != m_StaticNodeInfos.end()) {
+            vector<string> tmp;
+            bool isfind(false);
+            limonp::Split(m_StaticNodeInfos[buf[2]], tmp, ",");
+            for (auto &onePinyin:tmp) {
+                if (onePinyin == buf[1]) {
+                    isfind = true;
+                    break;
+                }
+            }
+            if (!isfind) {
+                m_StaticNodeInfos[buf[2]] += ("," + buf[2]);
+            }
+        } else {
+            m_StaticNodeInfos[buf[2]] = buf[1];
+        }
+    }
+}
+
+void Pinyin4cppDictTrie::LoadWordsDict(const string &filePath) {
+    ifstream ifs(filePath.c_str());
+    string line;
+    vector<string> buf;
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#")) {
+            continue;
+        }
+        limonp::Split(line, buf, ":");
+        assert(buf.size() == WORDS_DICT_COLUMN_NUM);
+        if (m_StaticNodeInfos.find(buf[0]) != m_StaticNodeInfos.end()) {
+            vector<string> tmp;
+            bool isfind(false);
+            limonp::Split(m_StaticNodeInfos[buf[0]], tmp, "/");
+            for (auto &onePinyin:tmp) {
+                if (onePinyin == buf[1]) {
+                    isfind = true;
+                    break;
+                }
+            }
+            if (!isfind) {
+                m_StaticNodeInfos[buf[0]] += ("/" + buf[1]);
+            }
+        } else {
+            m_StaticNodeInfos[buf[0]] = buf[1];
+        }
+    }
+}
+
+string CalcFileListMD5(const string &files_list, size_t &file_size_sum) {
+    limonp::MD5 md5;
+
+    const auto files = limonp::Split(files_list, "|;");
+    file_size_sum = 0;
+
+    for (auto const & local_path : files) {
+        const int fd = open(local_path.c_str(), O_RDONLY);
+        if (fd < 0){
+            continue;
+        }
+        auto const len = lseek(fd, 0, SEEK_END);
+        if (len > 0) {
+            void * addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+            assert(MAP_FAILED != addr);
+
+            md5.Update((unsigned char *) addr, len);
+            file_size_sum += len;
+
+            munmap(addr, len);
+        }
+        close(fd);
+    }
+
+    md5.Final();
+    return string(md5.digestChars);
+}
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dictTrie.h
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dictTrie.h
@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef PINYIN4cpp_DICTTRIE_H
+#define PINYIN4cpp_DICTTRIE_H
+
+#include "pinyin4cpp_dataTrie.h"
+using namespace std;
+
+const size_t SINGLE_WORD_DICT_COLUMN_NUM = 3;
+const size_t WORDS_DICT_COLUMN_NUM = 2;
+
+class Pinyin4cppDictTrie {
+public:
+    Pinyin4cppDictTrie(const string& single_word_dict_path, const string& words_dict_paths, const string & dat_cache_path = "");
+
+    ~Pinyin4cppDictTrie() {}
+
+    string Find(const string &word) const;
+
+    bool Contains(string &word);
+    bool IsMultiTone(const string &word);
+
+    size_t GetTotalDictSize() const;
+
+private:
+    void Init(const string& single_word_dict_path, const string& words_dict_paths, string dat_cache_path);
+
+    void LoadSingleWordDict(const string& filePath);
+
+    void LoadWordsDict(const string& filePath);
+
+private:
+    map<string, string> m_StaticNodeInfos;
+
+    size_t m_TotalDictSize_ = 0;
+    Pinyin4cppDataTrie m_DataTrie;
+
+};
+
+inline string CalcFileListMD5(const string & files_list, size_t & file_size_sum);
+
+#endif //PINYIN4cpp_DICTTRIE_H
--- a/libchinese-segmentation/pinyinmanager.cpp
+++ b/libchinese-segmentation/pinyinmanager.cpp
@ -1,55 +0,0 @@
-#include "pinyinmanager.h"
-#include <mutex>
-PinYinManager * PinYinManager::g_pinYinManager = nullptr;
-std::once_flag g_singleFlag;
-PinYinManager * PinYinManager::getInstance()
-{
-    call_once(g_singleFlag, []() {
-        g_pinYinManager = new PinYinManager;
-    });
-    return g_pinYinManager;
-}
-
-bool PinYinManager::contains(string &word)
-{
-    return m_pinYinTrie->contains(word);
-}
-
-bool PinYinManager::isMultiTon(string &word)
-{
-    return m_pinYinTrie->isMultiTone(word);
-}
-
-bool PinYinManager::isMultiTon(string word)
-{
-    return m_pinYinTrie->isMultiTone(word);
-}
-
-int PinYinManager::getResults(string word, QStringList &results)
-{
-    results.clear();
-    if (-1 != m_pinYinTrie->getMultiTonResults(word, results)) {
-        return 0;
-    }
-    QString tmp;
-    if (-1 != m_pinYinTrie->getSingleTonResult(word, tmp)) {
-        results.append(tmp);
-        return 0;
-    }
-    return -1;
-}
-
-PinYinManager::PinYinManager()
-{
-    const char * const  PINYIN_PATH = "/usr/share/ukui-search/res/dict/pinyinWithoutTone.txt";
-    m_pinYinTrie = new cppjieba::PinYinTrie(PINYIN_PATH);
-}
-
-PinYinManager::~PinYinManager()
-{
-    if (m_pinYinTrie){
-        delete m_pinYinTrie;
-        m_pinYinTrie = nullptr;
-    }
-}
-
--- a/libchinese-segmentation/pinyinmanager.h
+++ b/libchinese-segmentation/pinyinmanager.h
@ -1,33 +0,0 @@
-#ifndef PINYINMANAGER_H
-#define PINYINMANAGER_H
-
-#include <QtCore/qglobal.h>
-#include "cppjieba/PinYinTrie.hpp"
-
-#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
-
-using namespace std;
-
-class PINYINMANAGER_EXPORT PinYinManager
-{
-public:
-    static PinYinManager * getInstance();
-
-public:
-    bool contains(string &word);
-    bool isMultiTon(string &word);
-    bool isMultiTon(string word);
-
-    int getResults(string word, QStringList &results);
-
-protected:
-    PinYinManager();
-    ~PinYinManager();
-
-private:
-    static PinYinManager *g_pinYinManager;
-    cppjieba::PinYinTrie *m_pinYinTrie = nullptr;
-
-};
-
-#endif // PINYINMANAGER_H
--- a/libchinese-segmentation/storage-base/cedar/cedar.h
+++ b/libchinese-segmentation/storage-base/cedar/cedar.h
@ -0,0 +1,682 @@
+// cedar -- C++ implementation of Efficiently-updatable Double ARray trie
+//  $Id: cedar.h 1938 2022-03-17 16:22:30Z ynaga $
+// Copyright (c) 2009-2015 Naoki Yoshinaga <ynaga@tkl.iis.u-tokyo.ac.jp>
+#ifndef CEDAR_H
+#define CEDAR_H
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cassert>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define STATIC_ASSERT(e, msg) typedef char msg[(e) ? 1 : -1]
+
+namespace cedar {
+  // typedefs
+  typedef unsigned char  uchar;
+  template <typename T> struct NaN { enum { N1 = -1, N2 = -2 }; };
+  template <> struct NaN <float> { enum { N1 = 0x7f800001, N2 = 0x7f800002 }; };
+  static const int MAX_ALLOC_SIZE = 1 << 16; // must be divisible by 256
+  // dynamic double array
+  template <typename value_type,
+            const int     NO_VALUE  = NaN <value_type>::N1,
+            const int     NO_PATH   = NaN <value_type>::N2,
+            const bool    ORDERED   = true,
+            const int     MAX_TRIAL = 1,
+            const size_t  NUM_TRACKING_NODES = 0>
+  class da {
+  public:
+    enum error_code { CEDAR_NO_VALUE = NO_VALUE, CEDAR_NO_PATH = NO_PATH, CEDAR_VALUE_LIMIT = 2147483647 };
+    typedef value_type result_type;
+    struct result_pair_type {
+      value_type  value;
+      size_t      length;  // prefix length
+    };
+    struct result_triple_type { // for predict ()
+      value_type  value;
+      size_t      length;  // suffix length
+      size_t      id;      // node id of value
+    };
+    struct node {
+      union { int base_; value_type value; }; // negative means prev empty index
+      int  check;                             // negative means next empty index
+      node (const int base__ = 0, const int check_ = 0)
+        : base_ (base__), check (check_) {}
+#ifdef USE_REDUCED_TRIE
+      int base () const { return - (base_ + 1); } // ~ in two's complement system
+#else
+      int base () const { return base_; }
+#endif
+    };
+    struct ninfo {  // x1.5 update speed; +.25 % memory (8n -> 10n)
+      uchar  sibling;   // right sibling (= 0 if not exist)
+      uchar  child;     // first child
+      ninfo () : sibling (0), child (0) {}
+    };
+    struct block { // a block w/ 256 elements
+      int   prev;   // prev block; 3 bytes
+      int   next;   // next block; 3 bytes
+      short num;    // # empty elements; 0 - 256
+      short reject; // minimum # branching failed to locate; soft limit
+      int   trial;  // # trial
+      int   ehead;  // first empty item
+      block () : prev (0), next (0), num (256), reject (257), trial (0), ehead (0) {}
+    };
+    da () : tracking_node (), _array (0), _ninfo (0), _block (0), _bheadF (0), _bheadC (0), _bheadO (0), _capacity (0), _size (0), _no_delete (false), _reject () {
+      STATIC_ASSERT(sizeof (value_type) <= sizeof (int),
+                    value_type_is_not_supported___maintain_a_value_array_by_yourself_and_store_its_index
+                    );
+      _initialize ();
+    }
+    ~da () { clear (false); }
+    size_t capacity   () const { return static_cast <size_t> (_capacity); }
+    size_t size       () const { return static_cast <size_t> (_size); }
+    size_t total_size () const { return sizeof (node) * _size; }
+    size_t unit_size  () const { return sizeof (node); }
+    size_t nonzero_size () const {
+      size_t i = 0;
+      for (int to = 0; to < _size; ++to)
+        if (_array[to].check >= 0) ++i;
+      return i;
+    }
+    size_t num_keys () const {
+      size_t i = 0;
+      for (int to = 0; to < _size; ++to)
+#ifdef USE_REDUCED_TRIE
+        if (_array[to].check >= 0 && _array[to].value >= 0) ++i;
+#else
+        if (_array[to].check >= 0 && _array[_array[to].check].base () == to) ++i;
+#endif
+      return i;
+    }
+    // interfance
+    template <typename T>
+    T exactMatchSearch (const char* key) const
+    { return exactMatchSearch <T> (key, std::strlen (key)); }
+    template <typename T>
+    T exactMatchSearch (const char* key, size_t len, size_t from = 0) const {
+      union { int i; value_type x; } b;
+      size_t pos = 0;
+      b.i = _find (key, from, pos, len);
+      if (b.i == CEDAR_NO_PATH) b.i = CEDAR_NO_VALUE;
+      T result;
+      _set_result (&result, b.x, len, from);
+      return result;
+    }
+    template <typename T>
+    size_t commonPrefixSearch (const char* key, T* result, size_t result_len) const
+    { return commonPrefixSearch (key, result, result_len, std::strlen (key)); }
+    template <typename T>
+    size_t commonPrefixSearch (const char* key, T* result, size_t result_len, size_t len, size_t from = 0) const {
+      size_t num = 0;
+      for (size_t pos = 0; pos < len; ) {
+        union { int i; value_type x; } b;
+        b.i = _find (key, from, pos, pos + 1);
+        if (b.i == CEDAR_NO_VALUE) continue;
+        if (b.i == CEDAR_NO_PATH)  return num;
+        if (num < result_len) _set_result (&result[num], b.x, pos, from);
+        ++num;
+      }
+      return num;
+    }
+    // predict key from double array
+    template <typename T>
+    size_t commonPrefixPredict (const char* key, T* result, size_t result_len)
+    { return commonPrefixPredict (key, result, result_len, std::strlen (key)); }
+    template <typename T>
+    size_t commonPrefixPredict (const char* key, T* result, size_t result_len, size_t len, size_t from = 0) {
+      size_t num (0), pos (0), p (0);
+      if (_find (key, from, pos, len) == CEDAR_NO_PATH) return 0;
+      union { int i; value_type x; } b;
+      size_t root = from;
+      for (b.i = begin (from, p); b.i != CEDAR_NO_PATH; b.i = next (from, p, root)) {
+        if (num < result_len) _set_result (&result[num], b.x, p, from);
+        ++num;
+      }
+      return num;
+    }
+    void suffix (char* key, size_t len, size_t to) const {
+      key[len] = '\0';
+      while (len--) {
+        const int from = _array[to].check;
+        key[len]
+          = static_cast <char> (_array[from].base () ^ static_cast <int> (to));
+        to = static_cast <size_t> (from);
+      }
+    }
+    value_type traverse (const char* key, size_t& from, size_t& pos) const
+    { return traverse (key, from, pos, std::strlen (key)); }
+    value_type traverse (const char* key, size_t& from, size_t& pos, size_t len) const {
+      union { int i; value_type x; } b;
+      b.i = _find (key, from, pos, len);
+      return b.x;
+    }
+    struct empty_callback { void operator () (const int, const int) {} }; // dummy empty function
+    value_type& update (const char* key)
+    { return update (key, std::strlen (key)); }
+    value_type& update (const char* key, size_t len, value_type val = value_type (0))
+    { size_t from (0), pos (0); return update (key, from, pos, len, val); }
+    value_type& update (const char* key, size_t& from, size_t& pos, size_t len, value_type val = value_type (0))
+    { empty_callback cf; return update (key, from, pos, len, val, cf); }
+    template <typename T>
+    value_type& update (const char* key, size_t& from, size_t& pos, size_t len, value_type val, T& cf) {
+      if (! len && ! from)
+        _err (__FILE__, __LINE__, "failed to insert zero-length key\n");
+#ifndef USE_FAST_LOAD
+      if (! _ninfo || ! _block) restore ();
+#endif
+      for (const uchar* const key_ = reinterpret_cast <const uchar*> (key);
+           pos < len; ++pos) {
+#ifdef USE_REDUCED_TRIE
+        const value_type val_ = _array[from].value;
+        if (val_ >= 0 && val_ != CEDAR_VALUE_LIMIT) // always new; correct this!
+          { const int to = _follow (from, 0, cf); _array[to].value = val_; }
+#endif
+        from = static_cast <size_t> (_follow (from, key_[pos], cf));
+      }
+#ifdef USE_REDUCED_TRIE
+      const int to = _array[from].value >= 0 ? static_cast <int> (from) : _follow (from, 0, cf);
+      if (_array[to].value == CEDAR_VALUE_LIMIT) _array[to].value = 0;
+#else
+      const int to = _follow (from, 0, cf);
+#endif
+      return _array[to].value += val;
+    }
+    // easy-going erase () without compression
+    int erase (const char* key) { return erase (key, std::strlen (key)); }
+    int erase (const char* key, size_t len, size_t from = 0) {
+      size_t pos = 0;
+      const int i = _find (key, from, pos, len);
+      if (i == CEDAR_NO_PATH || i == CEDAR_NO_VALUE) return -1;
+      erase (from);
+      return 0;
+    }
+    void erase (size_t from) {
+      // _test ();
+#ifdef USE_REDUCED_TRIE
+      int e = _array[from].value >= 0 ? static_cast <int> (from) : _array[from].base () ^ 0;
+      from = static_cast <size_t> (_array[e].check);
+#else
+      int e = _array[from].base () ^ 0;
+#endif
+      bool flag = false; // have sibling
+      do {
+        const node& n = _array[from];
+        flag = _ninfo[n.base () ^ _ninfo[from].child].sibling;
+        if (flag) _pop_sibling (from, n.base (), static_cast <uchar> (n.base () ^ e));
+        _push_enode (e);
+         e = static_cast <int> (from);
+        from = static_cast <size_t> (_array[from].check);
+      } while (! flag);
+    }
+    int build (size_t num, const char** key, const size_t* len = 0, const value_type* val = 0) {
+      for (size_t i = 0; i < num; ++i)
+        update (key[i], len ? len[i] : std::strlen (key[i]), val ? val[i] : value_type (i));
+      return 0;
+    }
+    template <typename T>
+    void dump (T* result, const size_t result_len) {
+      union { int i; value_type x; } b;
+      size_t num (0), from (0), p (0);
+      for (b.i = begin (from, p); b.i != CEDAR_NO_PATH; b.i = next (from, p))
+        if (num < result_len)
+          _set_result (&result[num++], b.x, p, from);
+        else
+          _err (__FILE__, __LINE__, "dump() needs array of length = num_keys()\n");
+    }
+    int save (const char* fn, const char* mode = "wb") const {
+      // _test ();
+      FILE* fp = std::fopen (fn, mode);
+      if (! fp) return -1;
+      std::fwrite (_array, sizeof (node), static_cast <size_t> (_size), fp);
+      std::fclose (fp);
+#ifdef USE_FAST_LOAD
+      const char* const info
+        = std::strcat (std::strcpy (new char[std::strlen (fn) + 5], fn), ".sbl");
+      fp = std::fopen (info, mode);
+      delete [] info; // resolve memory leak
+      if (! fp) return -1;
+      std::fwrite (&_bheadF, sizeof (int), 1, fp);
+      std::fwrite (&_bheadC, sizeof (int), 1, fp);
+      std::fwrite (&_bheadO, sizeof (int), 1, fp);
+      std::fwrite (_ninfo, sizeof (ninfo), static_cast <size_t> (_size), fp);
+      std::fwrite (_block, sizeof (block), static_cast <size_t> (_size >> 8), fp);
+      std::fclose (fp);
+#endif
+      return 0;
+    }
+    int open (const char* fn, const char* mode = "rb",
+              const size_t offset = 0, size_t size_ = 0) {
+      FILE* fp = std::fopen (fn, mode);
+      if (! fp) return -1;
+      // get size
+      if (! size_) {
+        if (std::fseek (fp, 0, SEEK_END) != 0) return -1;
+        size_ = static_cast <size_t> (std::ftell (fp));
+        if (std::fseek (fp, 0, SEEK_SET) != 0) return -1;
+      }
+      if (size_ <= offset) return -1;
+      // set array
+      clear (false);
+      size_ = (size_ - offset) / sizeof (node);
+      if (std::fseek (fp, static_cast <long> (offset), SEEK_SET) != 0) return -1;
+      _array = static_cast <node*>  (std::malloc (sizeof (node)  * size_));
+#ifdef USE_FAST_LOAD
+      _ninfo = static_cast <ninfo*> (std::malloc (sizeof (ninfo) * size_));
+      _block = static_cast <block*> (std::malloc (sizeof (block) * size_));
+      if (! _array || ! _ninfo || ! _block)
+#else
+        if (! _array)
+#endif
+          _err (__FILE__, __LINE__, "memory allocation failed\n");
+      if (size_ != std::fread (_array, sizeof (node), size_, fp)) return -1;
+      std::fclose (fp);
+      _size = static_cast <int> (size_);
+#ifdef USE_FAST_LOAD
+      const char* const info
+        = std::strcat (std::strcpy (new char[std::strlen (fn) + 5], fn), ".sbl");
+      fp = std::fopen (info, mode);
+      delete [] info; // resolve memory leak
+      if (! fp) return -1;
+      std::fread (&_bheadF, sizeof (int), 1, fp);
+      std::fread (&_bheadC, sizeof (int), 1, fp);
+      std::fread (&_bheadO, sizeof (int), 1, fp);
+      if (size_ != std::fread (_ninfo, sizeof (ninfo), size_, fp) ||
+          size_ != std::fread (_block, sizeof (block), size_ >> 8, fp) << 8)
+        return -1;
+      std::fclose (fp);
+      _capacity = _size;
+#endif
+      return 0;
+    }
+#ifndef USE_FAST_LOAD
+    void restore () { // restore information to update
+      if (! _block) _restore_block ();
+      if (! _ninfo) _restore_ninfo ();
+      _capacity = _size;
+    }
+#endif
+    void set_array (void* p, size_t size_ = 0) { // ad-hoc
+      clear (false);
+      _array = static_cast <node*> (p);
+      _size  = static_cast <int> (size_);
+      _no_delete = true;
+    }
+    const void* array () const { return _array; }
+    void clear (const bool reuse = true) {
+      if (_array && ! _no_delete) std::free (_array);
+      if (_ninfo) std::free (_ninfo);
+      if (_block) std::free (_block);
+      _array = 0; _ninfo = 0; _block = 0;
+      _bheadF = _bheadC = _bheadO = _capacity = _size = 0; // *
+      if (reuse) _initialize ();
+      _no_delete = false;
+    }
+    // return the first child for a tree rooted by a given node
+    int begin (size_t& from, size_t& len) {
+#ifndef USE_FAST_LOAD
+      if (! _ninfo) _restore_ninfo ();
+#endif
+      int   base = _array[from].base ();
+      uchar c    = _ninfo[from].child;
+      if (! from && ! (c = _ninfo[base ^ c].sibling)) // bug fix
+        return CEDAR_NO_PATH; // no entry
+      for (; c; ++len) {
+        from = static_cast <size_t> (_array[from].base ()) ^ c;
+        c    = _ninfo[from].child;
+      }
+#ifdef USE_REDUCED_TRIE
+      if (_array[from].value >= 0) return _array[from].value;
+#endif
+      return _array[_array[from].base () ^ c].base_;
+    }
+    // return the next child if any
+    int next (size_t& from, size_t& len, const size_t root = 0) {
+      uchar c = 0;
+#ifdef USE_REDUCED_TRIE
+      if (_array[from].value < 0)
+#endif
+        c = _ninfo[_array[from].base () ^ 0].sibling;
+      for (; ! c && from != root; --len) {
+        c = _ninfo[from].sibling;
+        from = static_cast <size_t> (_array[from].check);
+      }
+      return c ?
+        begin (from = static_cast <size_t> (_array[from].base ()) ^ c, ++len) :
+        CEDAR_NO_PATH;
+    }
+    // test the validity of double array for debug
+    void test (const size_t from = 0) const {
+      const int base = _array[from].base ();
+      uchar c = _ninfo[from].child;
+      do {
+        if (from) assert (_array[base ^ c].check == static_cast <int> (from));
+        if (c  && _array[base ^ c].value < 0) // correct this
+          test (static_cast <size_t> (base ^ c));
+      } while ((c = _ninfo[base ^ c].sibling));
+    }
+    size_t tracking_node[NUM_TRACKING_NODES + 1];
+  private:
+    // currently disabled; implement these if you need
+    da (const da&);
+    da& operator= (const da&);
+    node*   _array;
+    ninfo*  _ninfo;
+    block*  _block;
+    int     _bheadF;  // first block of Full;   0
+    int     _bheadC;  // first block of Closed; 0 if no Closed
+    int     _bheadO;  // first block of Open;   0 if no Open
+    int     _capacity;
+    int     _size;
+    int     _no_delete;
+    short   _reject[257];
+    //
+    static void _err (const char* fn, const int ln, const char* msg)
+    { std::fprintf (stderr, "cedar: %s [%d]: %s", fn, ln, msg); std::exit (1); }
+    template <typename T>
+    static void _realloc_array (T*& p, const int size_n, const int size_p = 0) {
+      void* tmp = std::realloc (p, sizeof (T) * static_cast <size_t> (size_n));
+      if (! tmp)
+        std::free (p), _err (__FILE__, __LINE__, "memory reallocation failed\n");
+      p = static_cast <T*> (tmp);
+      static const T T0 = T ();
+      for (T* q (p + size_p), * const r (p + size_n); q != r; ++q) *q = T0;
+    }
+    void _initialize () { // initilize the first special block
+      _realloc_array (_array, 256, 256);
+      _realloc_array (_ninfo, 256);
+      _realloc_array (_block, 1);
+#ifdef USE_REDUCED_TRIE
+      _array[0] = node (-1, -1);
+#else
+      _array[0] = node (0, -1);
+#endif
+      for (int i = 1; i < 256; ++i)
+        _array[i] = node (i == 1 ? -255 : - (i - 1), i == 255 ? -1 : - (i + 1));
+      _block[0].ehead = 1; // bug fix for erase
+      _capacity = _size = 256;
+      for (size_t i = 0 ; i <= NUM_TRACKING_NODES; ++i) tracking_node[i] = 0;
+      for (short  i = 0; i <= 256; ++i) _reject[i] = i + 1;
+    }
+    // follow/create edge
+    template <typename T>
+    int _follow (size_t& from, const uchar& label, T& cf) {
+      int to = 0;
+      const int base = _array[from].base ();
+      if (base < 0 || _array[to = base ^ label].check < 0) {
+        to = _pop_enode (base, label, static_cast <int> (from));
+        _push_sibling (from, to ^ label, label, base >= 0);
+      } else if (_array[to].check != static_cast <int> (from))
+        to = _resolve (from, base, label, cf);
+      return to;
+    }
+    // find key from double array
+    int _find (const char* key, size_t& from, size_t& pos, const size_t len) const {
+      for (const uchar* const key_ = reinterpret_cast <const uchar*> (key);
+           pos < len; ) { // follow link
+#ifdef USE_REDUCED_TRIE
+        if (_array[from].value >= 0) return CEDAR_NO_PATH;
+#endif
+        size_t to = static_cast <size_t> (_array[from].base ()); to ^= key_[pos];
+        if (_array[to].check != static_cast <int> (from)) return CEDAR_NO_PATH;
+        ++pos;
+        from = to;
+      }
+#ifdef USE_REDUCED_TRIE
+      if (_array[from].value >= 0) // get value from leaf; only allow integer key
+        return _array[from].value;
+#endif
+      const node n = _array[_array[from].base () ^ 0];
+      if (n.check != static_cast <int> (from)) return CEDAR_NO_VALUE;
+      return n.base_;
+    }
+#ifndef USE_FAST_LOAD
+    void _restore_ninfo () {
+      _realloc_array (_ninfo, _size);
+      for (int to = 0; to < _size; ++to) {
+        const int from = _array[to].check;
+        if (from < 0) continue; // skip empty node
+        const int base = _array[from].base ();
+        if (const uchar label = static_cast <uchar> (base ^ to)) // skip leaf
+          _push_sibling (static_cast <size_t> (from), base, label,
+                         ! from || _ninfo[from].child || _array[base ^ 0].check == from);
+      }
+    }
+    void _restore_block () {
+      _realloc_array (_block, _size >> 8);
+      _bheadF = _bheadC = _bheadO = 0;
+      for (int bi (0), e (0); e < _size; ++bi) { // register blocks to full
+        block& b = _block[bi];
+        b.num = 0;
+        for (; e < (bi << 8) + 256; ++e)
+          if (_array[e].check < 0 && ++b.num == 1) b.ehead = e;
+        int& head_out = b.num == 1 ? _bheadC : (b.num == 0 ? _bheadF : _bheadO);
+        _push_block (bi, head_out, ! head_out && b.num);
+      }
+    }
+#endif
+    void _set_result (result_type* x, value_type r, size_t = 0, size_t = 0) const
+    { *x = r; }
+    void _set_result (result_pair_type* x, value_type r, size_t l, size_t = 0) const
+    { x->value = r; x->length = l; }
+    void _set_result (result_triple_type* x, value_type r, size_t l, size_t from) const
+    { x->value = r; x->length = l; x->id = from; }
+    void _pop_block (const int bi, int& head_in, const bool last) {
+      if (last) { // last one poped; Closed or Open
+        head_in = 0;
+      } else {
+        const block& b = _block[bi];
+        _block[b.prev].next = b.next;
+        _block[b.next].prev = b.prev;
+        if (bi == head_in) head_in = b.next;
+      }
+    }
+    void _push_block (const int bi, int& head_out, const bool empty) {
+      block& b = _block[bi];
+      if (empty) { // the destination is empty
+        head_out = b.prev = b.next = bi;
+      } else { // use most recently pushed
+        int& tail_out = _block[head_out].prev;
+        b.prev = tail_out;
+        b.next = head_out;
+        head_out = tail_out = _block[tail_out].next = bi;
+      }
+    }
+    int _add_block () {
+      if (_size == _capacity) { // allocate memory if needed
+#ifdef USE_EXACT_FIT
+        _capacity += _size >= MAX_ALLOC_SIZE ? MAX_ALLOC_SIZE : _size;
+#else
+        _capacity += _capacity;
+#endif
+        _realloc_array (_array, _capacity, _capacity);
+        _realloc_array (_ninfo, _capacity, _size);
+        _realloc_array (_block, _capacity >> 8, _size >> 8);
+      }
+      _block[_size >> 8].ehead = _size;
+      _array[_size] = node (- (_size + 255),  - (_size + 1));
+      for (int i = _size + 1; i < _size + 255; ++i)
+        _array[i] = node (-(i - 1), -(i + 1));
+      _array[_size + 255] = node (- (_size + 254),  -_size);
+      _push_block (_size >> 8, _bheadO, ! _bheadO); // append to block Open
+      _size += 256;
+      return (_size >> 8) - 1;
+    }
+    // transfer block from one start w/ head_in to one start w/ head_out
+    void _transfer_block (const int bi, int& head_in, int& head_out) {
+      _pop_block  (bi, head_in, bi == _block[bi].next);
+      _push_block (bi, head_out, ! head_out && _block[bi].num);
+    }
+    // pop empty node from block; never transfer the special block (bi = 0)
+    int _pop_enode (const int base, const uchar label, const int from) {
+      const int e  = base < 0 ? _find_place () : base ^ label;
+      const int bi = e >> 8;
+      node&  n = _array[e];
+      block& b = _block[bi];
+      if (--b.num == 0) {
+        if (bi) _transfer_block (bi, _bheadC, _bheadF); // Closed to Full
+      } else { // release empty node from empty ring
+        _array[-n.base_].check = n.check;
+        _array[-n.check].base_ = n.base_;
+        if (e == b.ehead) b.ehead = -n.check; // set ehead
+        if (bi && b.num == 1 && b.trial != MAX_TRIAL) // Open to Closed
+          _transfer_block (bi, _bheadO, _bheadC);
+      }
+      // initialize the released node
+#ifdef USE_REDUCED_TRIE
+      n.value = CEDAR_VALUE_LIMIT; n.check = from;
+      if (base < 0) _array[from].base_ = - (e ^ label) - 1;
+#else
+      if (label) n.base_ = -1; else n.value = value_type (0); n.check = from;
+      if (base < 0) _array[from].base_ = e ^ label;
+#endif
+      return e;
+    }
+    // push empty node into empty ring
+    void _push_enode (const int e) {
+      const int bi = e >> 8;
+      block& b = _block[bi];
+      if (++b.num == 1) { // Full to Closed
+        b.ehead = e;
+        _array[e] = node (-e, -e);
+        if (bi) _transfer_block (bi, _bheadF, _bheadC); // Full to Closed
+      } else {
+        const int prev = b.ehead;
+        const int next = -_array[prev].check;
+        _array[e] = node (-prev, -next);
+        _array[prev].check = _array[next].base_ = -e;
+        if (b.num == 2 || b.trial == MAX_TRIAL) // Closed to Open
+          if (bi) _transfer_block (bi, _bheadC, _bheadO);
+        b.trial = 0;
+      }
+      if (b.reject < _reject[b.num]) b.reject = _reject[b.num];
+      _ninfo[e] = ninfo (); // reset ninfo; no child, no sibling
+    }
+    // push label to from's child
+    void _push_sibling (const size_t from, const int base, const uchar label, const bool flag = true) {
+      uchar* c = &_ninfo[from].child;
+      if (flag && (ORDERED ? label > *c : ! *c))
+        do c = &_ninfo[base ^ *c].sibling; while (ORDERED && *c && *c < label);
+      _ninfo[base ^ label].sibling = *c, *c = label;
+    }
+    // pop label from from's child
+    void _pop_sibling (const size_t from, const int base, const uchar label) {
+      uchar* c = &_ninfo[from].child;
+      while (*c != label) c = &_ninfo[base ^ *c].sibling;
+      *c = _ninfo[base ^ label].sibling;
+    }
+    // check whether to replace branching w/ the newly added node
+    bool _consult (const int base_n, const int base_p, uchar c_n, uchar c_p) const {
+      do if (! (c_p = _ninfo[base_p ^ c_p].sibling)) return false;
+      while ((c_n = _ninfo[base_n ^ c_n].sibling));
+      return true;
+    }
+    // enumerate (equal to or more than one) child nodes
+    uchar* _set_child (uchar* p, const int base, uchar c, const int label = -1) {
+      --p;
+      if (! c)  { *++p = c; c = _ninfo[base ^ c].sibling; } // 0: terminal
+      if (ORDERED)
+        while (c && c < label) { *++p = c; c = _ninfo[base ^ c].sibling; }
+      if (label != -1) *++p = static_cast <uchar> (label);
+      while (c) { *++p = c; c = _ninfo[base ^ c].sibling; }
+      return p;
+    }
+    // explore new block to settle down
+    int _find_place () {
+      if (_bheadC) return _block[_bheadC].ehead;
+      if (_bheadO) return _block[_bheadO].ehead;
+      return _add_block () << 8;
+    }
+    int _find_place (const uchar* const first, const uchar* const last) {
+      if (int bi = _bheadO) {
+        const int   bz = _block[_bheadO].prev;
+        const short nc = static_cast <short> (last - first + 1);
+        while (1) { // set candidate block
+          block& b = _block[bi];
+          if (b.num >= nc && nc < b.reject) // explore configuration
+            for (int e = b.ehead;;) {
+              const int base = e ^ *first;
+              for (const uchar* p = first; _array[base ^ *++p].check < 0; )
+                if (p == last) return b.ehead = e; // no conflict
+              if ((e = -_array[e].check) == b.ehead) break;
+            }
+          b.reject = nc;
+          if (b.reject < _reject[b.num]) _reject[b.num] = b.reject;
+          const int bi_ = b.next;
+          if (++b.trial == MAX_TRIAL) _transfer_block (bi, _bheadO, _bheadC);
+          if (bi == bz) break;
+          bi = bi_;
+        };
+      }
+      return _add_block () << 8;
+    }
+    // resolve conflict on base_n ^ label_n = base_p ^ label_p
+    template <typename T>
+    int _resolve (size_t& from_n, const int base_n, const uchar label_n, T& cf) {
+      // examine siblings of conflicted nodes
+      const int to_pn  = base_n ^ label_n;
+      const int from_p = _array[to_pn].check;
+      const int base_p = _array[from_p].base ();
+      const bool flag // whether to replace siblings of newly added
+        = _consult (base_n, base_p, _ninfo[from_n].child, _ninfo[from_p].child);
+      uchar child[256];
+      uchar* const first = &child[0];
+      uchar* const last  =
+        flag ? _set_child (first, base_n, _ninfo[from_n].child, label_n)
+        : _set_child (first, base_p, _ninfo[from_p].child);
+      const int base =
+        (first == last ? _find_place () : _find_place (first, last)) ^ *first;
+      // replace & modify empty list
+      const int from  = flag ? static_cast <int> (from_n) : from_p;
+      const int base_ = flag ? base_n : base_p;
+      if (flag && *first == label_n) _ninfo[from].child = label_n; // new child
+#ifdef USE_REDUCED_TRIE
+      _array[from].base_ = -base - 1; // new base
+#else
+      _array[from].base_ = base; // new base
+#endif
+      for (const uchar* p = first; p <= last; ++p) { // to_ => to
+        const int to  = _pop_enode (base, *p, from);
+        const int to_ = base_ ^ *p;
+        _ninfo[to].sibling = (p == last ? 0 : *(p + 1));
+        if (flag && to_ == to_pn) continue; // skip newcomer (no child)
+        cf (to_, to); // user-defined callback function to handle moved nodes
+        node& n  = _array[to];
+        node& n_ = _array[to_];
+#ifdef USE_REDUCED_TRIE
+        if ((n.base_ = n_.base_) < 0 && *p) // copy base; bug fix
+#else
+        if ((n.base_ = n_.base_) > 0 && *p) // copy base; bug fix
+#endif
+          {
+            uchar c = _ninfo[to].child = _ninfo[to_].child;
+            do _array[n.base () ^ c].check = to; // adjust grand son's check
+            while ((c = _ninfo[n.base () ^ c].sibling));
+          }
+        if (! flag && to_ == static_cast <int> (from_n)) // parent node moved
+          from_n = static_cast <size_t> (to); // bug fix
+        if (! flag && to_ == to_pn) { // the address is immediately used
+          _push_sibling (from_n, to_pn ^ label_n, label_n);
+          _ninfo[to_].child = 0; // remember to reset child
+#ifdef USE_REDUCED_TRIE
+          n_.value = CEDAR_VALUE_LIMIT;
+#else
+          if (label_n) n_.base_ = -1; else n_.value = value_type (0);
+#endif
+          n_.check = static_cast <int> (from_n);
+        } else
+          _push_enode (to_);
+        if (NUM_TRACKING_NODES) // keep the traversed node updated
+          for (size_t j = 0; tracking_node[j] != 0; ++j)
+            if (tracking_node[j] == static_cast <size_t> (to_))
+              { tracking_node[j] = static_cast <size_t> (to); break; }
+      }
+      return flag ? base ^ label_n : to_pn;
+    }
+  };
+}
+#endif
--- a/libchinese-segmentation/storage-base/cedar/cedarpp.h
+++ b/libchinese-segmentation/storage-base/cedar/cedarpp.h
@ -0,0 +1,834 @@
+// cedar -- C++ implementation of Efficiently-updatable Double ARray trie
+//  $Id: cedarpp.h 1916 2017-07-12 07:30:56Z ynaga $
+// Copyright (c) 2009-2015 Naoki Yoshinaga <ynaga@tkl.iis.u-tokyo.ac.jp>
+#ifndef CEDAR_H
+#define CEDAR_H
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <climits>
+#include <cassert>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define STATIC_ASSERT(e, msg) typedef char msg[(e) ? 1 : -1]
+
+namespace cedar {
+  // typedefs
+#if LONG_BIT == 64
+  typedef unsigned long       npos_t; // possibly compatible with size_t
+#else
+  typedef unsigned long long  npos_t;
+#endif
+  typedef unsigned char       uchar;
+  static const npos_t TAIL_OFFSET_MASK = static_cast <npos_t> (0xffffffff);
+  static const npos_t NODE_INDEX_MASK  = static_cast <npos_t> (0xffffffff) << 32;
+  template <typename T> struct NaN { enum { N1 = -1, N2 = -2 }; };
+  template <> struct NaN <float> { enum { N1 = 0x7f800001, N2 = 0x7f800002 }; };
+  static const int MAX_ALLOC_SIZE = 1 << 16; // must be divisible by 256
+  // dynamic double array
+  template <typename value_type,
+            const int     NO_VALUE  = NaN <value_type>::N1,
+            const int     NO_PATH   = NaN <value_type>::N2,
+            const bool    ORDERED   = true,
+            const int     MAX_TRIAL = 1,
+            const size_t  NUM_TRACKING_NODES = 0>
+  class da {
+  public:
+    enum error_code { CEDAR_NO_VALUE = NO_VALUE, CEDAR_NO_PATH = NO_PATH };
+    typedef value_type result_type;
+    struct result_pair_type {
+      value_type  value;
+      size_t      length;  // prefix length
+    };
+    struct result_triple_type { // for predict ()
+      value_type  value;
+      size_t      length;  // suffix length
+      npos_t      id;      // node id of value
+    };
+    struct node {
+      union { int base; value_type value; }; // negative means prev empty index
+      int  check;                            // negative means next empty index
+      node (const int base_ = 0, const int check_ = 0)
+        : base (base_), check (check_) {}
+    };
+    struct ninfo {  // x1.5 update speed; +.25 % memory (8n -> 10n)
+      uchar  sibling;   // right sibling (= 0 if not exist)
+      uchar  child;     // first child
+      ninfo () : sibling (0), child (0) {}
+    };
+    struct block { // a block w/ 256 elements
+      int   prev;   // prev block; 3 bytes
+      int   next;   // next block; 3 bytes
+      short num;    // # empty elements; 0 - 256
+      short reject; // minimum # branching failed to locate; soft limit
+      int   trial;  // # trial
+      int   ehead;  // first empty item
+      block () : prev (0), next (0), num (256), reject (257), trial (0), ehead (0) {}
+    };
+    da () : tracking_node (), _array (0), _tail (0), _tail0 (0), _ninfo (0), _block (0), _bheadF (0), _bheadC (0), _bheadO (0), _capacity (0), _size (0), _quota (0), _quota0 (0), _no_delete (false), _reject () {
+#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
+      STATIC_ASSERT(sizeof (value_type) <= sizeof (int),
+                    value_type_is_not_supported___maintain_a_value_array_by_yourself_and_store_its_index_to_trie
+                    );
+#pragma GCC diagnostic warning "-Wunused-local-typedefs"
+      _initialize ();
+    }
+    ~da () { clear (false); }
+    size_t capacity   () const { return static_cast <size_t> (_capacity); }
+    size_t size       () const { return static_cast <size_t> (_size); }
+    size_t length     () const { return static_cast <size_t> (*_length); }
+    size_t total_size () const { return sizeof (node) * _size; }
+    size_t unit_size  () const { return sizeof (node); }
+    size_t nonzero_size () const {
+      size_t i = 0;
+      for (int to = 0; to < _size; ++to)
+        if (_array[to].check >= 0) ++i;
+      return i;
+    }
+    size_t nonzero_length () const {
+      size_t i (0), j (0);
+      for (int to = 0; to < _size; ++to) {
+        const node& n = _array[to];
+        if (n.check >= 0 && _array[n.check].base != to && n.base < 0)
+          { ++j; for (const char* p = &_tail[-n.base]; *p; ++p) ++i; }
+      }
+      return i + j * (1 + sizeof (value_type));
+    }
+    size_t num_keys () const {
+      size_t i = 0;
+      for (int to = 0; to < _size; ++to) {
+        const node& n = _array[to];
+        if (n.check >= 0 && (_array[n.check].base == to || n.base < 0)) ++i;
+      }
+      return i;
+    }
+    // interfance
+    template <typename T>
+    T exactMatchSearch (const char* key) const
+    { return exactMatchSearch <T> (key, std::strlen (key)); }
+    template <typename T>
+    T exactMatchSearch (const char* key, size_t len, npos_t from = 0) const {
+      union { int i; value_type x; } b;
+      size_t pos = 0;
+      b.i = _find (key, from, pos, len);
+      if (b.i == CEDAR_NO_PATH) b.i = CEDAR_NO_VALUE;
+      T result;
+      _set_result (&result, b.x, len, from);
+      return result;
+    }
+    template <typename T>
+    size_t commonPrefixSearch (const char* key, T* result, size_t result_len) const
+    { return commonPrefixSearch (key, result, result_len, std::strlen (key)); }
+    template <typename T>
+    size_t commonPrefixSearch (const char* key, T* result, size_t result_len, size_t len, npos_t from = 0) const {
+      size_t num = 0;
+      for (size_t pos = 0; pos < len; ) {
+        union { int i; value_type x; } b;
+        b.i = _find (key, from, pos, pos + 1);
+        if (b.i == CEDAR_NO_VALUE) continue;
+        if (b.i == CEDAR_NO_PATH)  return num;
+        if (num < result_len) _set_result (&result[num], b.x, pos, from);
+        ++num;
+      }
+      return num;
+    }
+    // predict key from double array
+    template <typename T>
+    size_t commonPrefixPredict (const char* key, T* result, size_t result_len)
+    { return commonPrefixPredict (key, result, result_len, std::strlen (key)); }
+    template <typename T>
+    size_t commonPrefixPredict (const char* key, T* result, size_t result_len, size_t len, npos_t from = 0) {
+      size_t num (0), pos (0), p (0);
+      if (_find (key, from, pos, len) == CEDAR_NO_PATH) return 0;
+      union { int i; value_type x; } b;
+      const npos_t root = from;
+      for (b.i = begin (from, p); b.i != CEDAR_NO_PATH; b.i = next (from, p, root)) {
+        if (num < result_len)
+          _set_result (&result[num], b.x, p, from);
+        ++num;
+      }
+      return num;
+    }
+    void suffix (char* key, size_t len, npos_t to) const {
+      key[len] = '\0';
+      if (const int offset = static_cast <int> (to >> 32)) {
+        to &= TAIL_OFFSET_MASK;
+        size_t len_tail = std::strlen (&_tail[-_array[to].base]);
+        if (len > len_tail) len -= len_tail; else len_tail = len, len = 0;
+        std::memcpy (&key[len], &_tail[static_cast <size_t> (offset) - len_tail], len_tail);
+      }
+      while (len--) {
+        const int from = _array[to].check;
+        key[len] = static_cast <char> (_array[from].base ^ static_cast <int> (to));
+        to = static_cast <npos_t> (from);
+      }
+    }
+    value_type traverse (const char* key, npos_t& from, size_t& pos) const
+    { return traverse (key, from, pos, std::strlen (key)); }
+    value_type traverse (const char* key, npos_t& from, size_t& pos, size_t len) const {
+      union { int i; value_type x; } b;
+      b.i = _find (key, from, pos, len);
+      return b.x;
+    }
+    struct empty_callback { void operator () (const int, const int) {} }; // dummy empty function
+    value_type& update (const char* key)
+    { return update (key, std::strlen (key)); }
+    value_type& update (const char* key, size_t len, value_type val = value_type (0))
+    { npos_t from (0); size_t pos (0); return update (key, from, pos, len, val); }
+    value_type& update (const char* key, npos_t& from, size_t& pos, size_t len, value_type val = value_type (0))
+    { empty_callback cf; return update (key, from, pos, len, val, cf); }
+    template <typename T>
+    value_type& update (const char* key, npos_t& from, size_t& pos, size_t len, value_type val, T& cf) {
+      if (! len && ! from)
+        _err (__FILE__, __LINE__, "failed to insert zero-length key\n");
+#ifndef USE_FAST_LOAD
+      if (! _ninfo || ! _block) restore ();
+#endif
+      npos_t offset = from >> 32;
+      if (! offset) { // node on trie
+        for (const uchar* const key_ = reinterpret_cast <const uchar*> (key);
+             _array[from].base >= 0; ++pos) {
+          if (pos == len) // could be reduced
+            { const int to = _follow (from, 0, cf); return _array[to].value += val; }
+          from = static_cast <size_t> (_follow (from, key_[pos], cf));
+        }
+        offset = static_cast <npos_t> (-_array[from].base);
+      }
+      if (offset >= sizeof (int)) { // go to _tail
+        const size_t pos_orig = pos;
+        char* const tail = &_tail[offset] - pos;
+        while (pos < len && key[pos] == tail[pos]) ++pos;
+        //
+        if (pos == len && tail[pos] == '\0') { // found exact key
+          if (const npos_t moved = pos - pos_orig) { // search end on tail
+            from &= TAIL_OFFSET_MASK;
+            from |= (offset + moved) << 32;
+          }
+          return *reinterpret_cast <value_type*> (&tail[len + 1]) += val;
+        }
+        // otherwise, insert the common prefix in tail if any
+        if (from >> 32) {
+          from &= TAIL_OFFSET_MASK; // reset to update tail offset
+          for (npos_t offset_ = static_cast <npos_t> (-_array[from].base);
+               offset_ < offset; ) {
+            from = static_cast <size_t>
+                   (_follow (from, static_cast <uchar> (_tail[offset_]), cf));
+            ++offset_;
+            // this shows intricacy in debugging updatable double array trie
+            if (NUM_TRACKING_NODES) // keep the traversed node (on tail) updated
+              for (size_t j = 0; tracking_node[j] != 0; ++j)
+                if (tracking_node[j] >> 32 == offset_)
+                  tracking_node[j] = static_cast <npos_t> (from);
+          }
+        }
+        for (size_t pos_ = pos_orig; pos_ < pos; ++pos_)
+          from = static_cast <size_t>
+                 (_follow (from, static_cast <uchar> (key[pos_]), cf));
+        npos_t moved = pos - pos_orig;
+        if (tail[pos]) { // remember to move offset to existing tail
+          const int to_ = _follow (from, static_cast <uchar> (tail[pos]), cf);
+          _array[to_].base = - static_cast <int> (offset + ++moved);
+          moved -= 1 + sizeof (value_type); // keep record
+        }
+        moved += offset;
+        for (npos_t i = offset; i <= moved; i += 1 + sizeof (value_type)) {
+          if (_quota0 == ++*_length0) {
+#ifdef USE_EXACT_FIT
+            _quota0 += *_length0 >= MAX_ALLOC_SIZE ? MAX_ALLOC_SIZE : *_length0;
+#else
+            _quota0 += _quota0;
+#endif
+            _realloc_array (_tail0, _quota0, *_length0);
+          }
+          _tail0[*_length0] = static_cast <int> (i);
+        }
+        if (pos == len || tail[pos] == '\0') {
+          const int to = _follow (from, 0, cf); // could be reduced
+          if (pos == len) return _array[to].value += val; // set value on trie
+          _array[to].value += *reinterpret_cast <value_type*> (&tail[pos + 1]);
+        }
+        from = static_cast <size_t> (_follow (from, static_cast <uchar> (key[pos]), cf));
+        ++pos;
+      }
+      const int needed = static_cast <int> (len - pos + 1 + sizeof (value_type));
+      if (pos == len && *_length0) { // reuse
+        const int offset0 = _tail0[*_length0];
+        _tail[offset0] = '\0';
+        _array[from].base = -offset0;
+        --*_length0;
+        return *reinterpret_cast <value_type*> (&_tail[offset0 + 1]) = val;
+      }
+      if (_quota < *_length + needed) {
+#ifdef USE_EXACT_FIT
+        _quota += needed > *_length || needed > MAX_ALLOC_SIZE ? needed :
+                  (*_length >= MAX_ALLOC_SIZE ? MAX_ALLOC_SIZE : *_length);
+#else
+        _quota += _quota >= needed ? _quota : needed;
+#endif
+        _realloc_array (_tail, _quota, *_length);
+      }
+      _array[from].base = -*_length;
+      const size_t pos_orig = pos;
+      char* const tail = &_tail[*_length] - pos;
+      if (pos < len) {
+        do tail[pos] = key[pos]; while (++pos < len);
+        from |= (static_cast <npos_t> (*_length) + (len - pos_orig)) << 32;
+      }
+      *_length += needed;
+      return *reinterpret_cast <value_type*> (&tail[len + 1]) += val;
+    }
+    // easy-going erase () without compression
+    int erase (const char* key) { return erase (key, std::strlen (key)); }
+    int erase (const char* key, size_t len, npos_t from = 0) {
+      size_t pos = 0;
+      const int i = _find (key, from, pos, len);
+      if (i == CEDAR_NO_PATH || i == CEDAR_NO_VALUE) return -1;
+      if (from >> 32) from &= TAIL_OFFSET_MASK; // leave tail as is
+      bool flag = _array[from].base < 0; // have sibling
+      int e = flag ? static_cast <int> (from) : _array[from].base ^ 0;
+      from  = _array[e].check;
+      do {
+        const node& n = _array[from];
+        flag = _ninfo[n.base ^ _ninfo[from].child].sibling;
+        if (flag) _pop_sibling (from, n.base, static_cast <uchar> (n.base ^ e));
+        _push_enode (e);
+        e = static_cast <int> (from);
+        from = static_cast <size_t> (_array[from].check);
+      } while (! flag);
+      return 0;
+    }
+    int build (size_t num, const char** key, const size_t* len = 0, const value_type* val = 0) {
+      for (size_t i = 0; i < num; ++i)
+        update (key[i], len ? len[i] : std::strlen (key[i]), val ? val[i] : value_type (i));
+      return 0;
+    }
+    template <typename T>
+    void dump (T* result, const size_t result_len) {
+      union { int i; value_type x; } b;
+      size_t num (0), p (0);
+      npos_t from = 0;
+      for (b.i = begin (from, p); b.i != CEDAR_NO_PATH; b.i = next (from, p))
+        if (num < result_len)
+          _set_result (&result[num++], b.x, p, from);
+        else
+          _err (__FILE__, __LINE__, "dump() needs array of length = num_keys()\n");
+    }
+    void shrink_tail () {
+      union { char* tail; int* length; } t;
+      const size_t length_
+        = static_cast <size_t> (*_length)
+        - static_cast <size_t> (*_length0) * (1 + sizeof (value_type));
+      t.tail = static_cast <char*> (std::malloc (length_));
+      if (! t.tail) _err (__FILE__, __LINE__, "memory allocation failed\n");
+      *t.length = static_cast <int> (sizeof (int));
+      for (int to = 0; to < _size; ++to) {
+        node& n = _array[to];
+        if (n.check >= 0 && _array[n.check].base != to && n.base < 0) {
+          char* const tail (&t.tail[*t.length]), * const tail_ (&_tail[-n.base]);
+          n.base = - *t.length;
+          int i = 0; do tail[i] = tail_[i]; while (tail[i++]);
+          *reinterpret_cast <value_type*> (&tail[i])
+            = *reinterpret_cast <const value_type*> (&tail_[i]);
+          *t.length += i + static_cast <int> (sizeof (value_type));
+        }
+      }
+      std::free (_tail);
+      _tail = t.tail;
+      _realloc_array (_tail,  *_length,  *_length);
+      _quota  = *_length;
+      _realloc_array (_tail0, 1);
+      _quota0 = 1;
+    }
+    int save (const char* fn, const char* mode, const bool shrink) {
+      if (shrink) shrink_tail ();
+      return save (fn, mode);
+    }
+    int save (const char* fn, const char* mode = "wb") const {
+      // _test ();
+      FILE* fp = std::fopen (fn, mode);
+      if (! fp) return -1;
+      std::fwrite (_tail,  sizeof (char), static_cast <size_t> (*_length), fp);
+      std::fwrite (_array, sizeof (node), static_cast <size_t> (_size), fp);
+      std::fclose (fp);
+#ifdef USE_FAST_LOAD
+      const char* const info
+        = std::strcat (std::strcpy (new char[std::strlen (fn) + 5], fn), ".sbl");
+      fp = std::fopen (info, mode);
+      delete [] info; // resolve memory leak
+      if (! fp) return -1;
+      std::fwrite (&_bheadF, sizeof (int), 1, fp);
+      std::fwrite (&_bheadC, sizeof (int), 1, fp);
+      std::fwrite (&_bheadO, sizeof (int), 1, fp);
+      std::fwrite (_ninfo, sizeof (ninfo), static_cast <size_t> (_size), fp);
+      std::fwrite (_block, sizeof (block), static_cast <size_t> (_size >> 8), fp);
+      std::fclose (fp);
+#endif
+      return 0;
+    }
+    int open (const char* fn, const char* mode = "rb",
+              const size_t offset = 0, size_t size_ = 0) {
+      FILE* fp = std::fopen (fn, mode);
+      if (! fp) return -1;
+      // get size
+      if (! size_) {
+        if (std::fseek (fp, 0, SEEK_END) != 0) return -1;
+        size_ = static_cast <size_t> (std::ftell (fp));
+        if (std::fseek (fp, 0, SEEK_SET) != 0) return -1;
+      }
+      if (size_ <= offset) return -1;
+      if (std::fseek (fp, static_cast <long> (offset), SEEK_SET) != 0) return -1;
+      int len = 0;
+      if (std::fread (&len, sizeof (int), 1, fp) != 1) return -1;
+      const size_t length_ = static_cast <size_t> (len);
+      if (size_ <= offset + length_) return -1;
+      // set array
+      clear (false);
+      size_ = (size_ - offset - length_) / sizeof (node);
+      _array = static_cast <node*>  (std::malloc (sizeof (node)  * size_));
+      _tail  = static_cast <char*>  (std::malloc (length_));
+      _tail0 = static_cast <int*>   (std::malloc (sizeof (int)));
+#ifdef USE_FAST_LOAD
+      _ninfo = static_cast <ninfo*> (std::malloc (sizeof (ninfo) * size_));
+      _block = static_cast <block*> (std::malloc (sizeof (block) * size_));
+      if (! _array || ! _tail || ! _tail0 || ! _ninfo || ! _block)
+#else
+      if (! _array || ! _tail || ! _tail0)
+#endif
+        _err (__FILE__, __LINE__, "memory allocation failed\n");
+      if (std::fseek (fp, static_cast <long> (offset), SEEK_SET) != 0) return -1;
+      if (length_ != std::fread (_tail,  sizeof (char), length_, fp) ||
+          size_   != std::fread (_array, sizeof (node), size_,   fp))
+        return -1;
+      std::fclose (fp);
+      _size = static_cast <int> (size_);
+      *_length0 = 0;
+#ifdef USE_FAST_LOAD
+      const char* const info
+        = std::strcat (std::strcpy (new char[std::strlen (fn) + 5], fn), ".sbl");
+      fp = std::fopen (info, mode);
+      delete [] info; // resolve memory leak
+      if (! fp) return -1;
+      std::fread (&_bheadF, sizeof (int), 1, fp);
+      std::fread (&_bheadC, sizeof (int), 1, fp);
+      std::fread (&_bheadO, sizeof (int), 1, fp);
+      if (size_      != std::fread (_ninfo, sizeof (ninfo), size_, fp) ||
+          size_ >> 8 != std::fread (_block, sizeof (block), size_ >> 8, fp))
+        return -1;
+      std::fclose (fp);
+      _capacity = _size;
+      _quota  = *_length;
+      _quota0 = 1;
+#endif
+      return 0;
+    }
+#ifndef USE_FAST_LOAD
+    void restore () { // restore information to update
+      if (! _block) _restore_block ();
+      if (! _ninfo) _restore_ninfo ();
+      _capacity = _size;
+      _quota  = *_length;
+      _quota0 = 1;
+    }
+#endif
+    void set_array (void* p, size_t size_ = 0) { // ad-hoc
+      clear (false);
+      if (size_)
+        size_ = size_ * unit_size () - static_cast <size_t> (*static_cast <int*> (p));
+      _tail  = static_cast <char*> (p);
+      _array = reinterpret_cast <node*> (_tail + *_length);
+      _size  = static_cast <int> (size_ / unit_size () + (size_ % unit_size () ? 1 : 0));
+      _no_delete = true;
+    }
+    const void* array () const { return _array; }
+    void clear (const bool reuse = true) {
+      if (_no_delete) _array = 0, _tail = 0;
+      if (_array) std::free (_array);
+      if (_tail)  std::free (_tail);
+      if (_tail0) std::free (_tail0);
+      if (_ninfo) std::free (_ninfo);
+      if (_block) std::free (_block);
+      _array = 0; _tail = 0; _tail0 = 0; _ninfo = 0; _block = 0;
+      _bheadF = _bheadC = _bheadO = _capacity = _size = _quota = _quota0 = 0;
+      if (reuse) _initialize ();
+      _no_delete = false;
+    }
+    // return the first child for a tree rooted by a given node
+    int begin (npos_t& from, size_t& len) {
+#ifndef USE_FAST_LOAD
+      if (! _ninfo) _restore_ninfo ();
+#endif
+      int base = from >> 32 ? - static_cast <int> (from >> 32) : _array[from].base;
+      if (base >= 0) { // on trie
+        uchar c = _ninfo[from].child;
+        if (! from && ! (c = _ninfo[base ^ c].sibling)) // bug fix
+          return CEDAR_NO_PATH; // no entry
+        for (; c && base >= 0; ++len) {
+          from = static_cast <size_t> (base) ^ c;
+          base = _array[from].base;
+          c    = _ninfo[from].child;
+        }
+        if (base >= 0) return _array[base ^ c].base;
+      }
+      const size_t len_ = std::strlen (&_tail[-base]);
+      from &= TAIL_OFFSET_MASK;
+      from |= static_cast <npos_t> (static_cast <size_t> (-base) + len_) << 32;
+      len += len_;
+      return *reinterpret_cast <int*> (&_tail[-base] + len_ + 1);
+    }
+    // return the next child if any
+    int next (npos_t& from, size_t& len, const npos_t root = 0) {
+      uchar c = 0;
+      if (const int offset = static_cast <int> (from >> 32)) { // on tail
+        if (root >> 32) return CEDAR_NO_PATH;
+        from &= TAIL_OFFSET_MASK;
+        len -= static_cast <size_t> (offset - (-_array[from].base));
+      } else
+        c    = _ninfo[_array[from].base ^ 0].sibling;
+      for (; ! c && from != root; --len) {
+        c    = _ninfo[from].sibling;
+        from = static_cast <size_t> (_array[from].check);
+      }
+      if (! c) return CEDAR_NO_PATH;
+      return begin (from = static_cast <size_t> (_array[from].base) ^ c, ++len);
+    }
+    npos_t tracking_node[NUM_TRACKING_NODES + 1];
+  private:
+    // currently disabled; implement these if you need
+    da (const da&);
+    da& operator= (const da&);
+    node*   _array;
+    union { char* _tail;  int* _length;  };
+    union { int*  _tail0; int* _length0; };
+    ninfo*  _ninfo;
+    block*  _block;
+    int     _bheadF;  // first block of Full;   0
+    int     _bheadC;  // first block of Closed; 0 if no Closed
+    int     _bheadO;  // first block of Open;   0 if no Open
+    int     _capacity;
+    int     _size;
+    int     _quota;
+    int     _quota0;
+    int     _no_delete;
+    short   _reject[257];
+    //
+    static void _err (const char* fn, const int ln, const char* msg)
+    { std::fprintf (stderr, "cedar: %s [%d]: %s", fn, ln, msg); std::exit (1); }
+    template <typename T>
+    static void _realloc_array (T*& p, const int size_n, const int size_p = 0) {
+      void* tmp = std::realloc (p, sizeof (T) * static_cast <size_t> (size_n));
+      if (! tmp)
+        std::free (p), _err (__FILE__, __LINE__, "memory reallocation failed\n");
+      p = static_cast <T*> (tmp);
+      static const T T0 = T ();
+      for (T* q (p + size_p), * const r (p + size_n); q != r; ++q) *q = T0;
+    }
+    void _initialize () { // initilize the first special block
+      _realloc_array (_array, 256, 256);
+      _realloc_array (_tail,  sizeof (int));
+      _realloc_array (_tail0, 1);
+      _realloc_array (_ninfo, 256);
+      _realloc_array (_block, 1);
+      _array[0] = node (0, -1);
+      for (int i = 1; i < 256; ++i)
+        _array[i] = node (i == 1 ? -255 : - (i - 1), i == 255 ? -1 : - (i + 1));
+      _capacity = _size = 256;
+      _block[0].ehead = 1; // bug fix for erase
+      _quota  = *_length  = static_cast <int> (sizeof (int));
+      _quota0 = 1;
+      for (size_t i = 0 ; i <= NUM_TRACKING_NODES; ++i) tracking_node[i] = 0;
+      for (short  i = 0; i <= 256; ++i) _reject[i] = i + 1;
+    }
+    // follow/create edge
+    template <typename T>
+    int _follow (npos_t& from, const uchar& label, T& cf) {
+      int to = 0;
+      const int base = _array[from].base;
+      if (base < 0 || _array[to = base ^ label].check < 0) {
+        to = _pop_enode (base, label, static_cast <int> (from));
+        _push_sibling (from, to ^ label, label, base >= 0);
+      } else if (_array[to].check != static_cast <int> (from))
+        to = _resolve (from, base, label, cf);
+      return to;
+    }
+    // find key from double array
+    int _find (const char* key, npos_t& from, size_t& pos, const size_t len) const {
+      npos_t offset = from >> 32;
+      if (! offset) { // node on trie
+        for (const uchar* const key_ = reinterpret_cast <const uchar*> (key);
+             _array[from].base >= 0; ) {
+          if (pos == len) {
+            const node& n = _array[_array[from].base ^ 0];
+            if (n.check != static_cast <int> (from)) return CEDAR_NO_VALUE;
+            return n.base;
+          }
+          size_t to = static_cast <size_t> (_array[from].base); to ^= key_[pos];
+          if (_array[to].check != static_cast <int> (from)) return CEDAR_NO_PATH;
+          ++pos;
+          from = to;
+        }
+        offset = static_cast <npos_t> (-_array[from].base);
+      }
+      // switch to _tail to match suffix
+      const size_t pos_orig = pos; // start position in reading _tail
+      const char* const tail = &_tail[offset] - pos;
+      if (pos < len) {
+        do if (key[pos] != tail[pos]) break; while (++pos < len);
+        if (const npos_t moved = pos - pos_orig) {
+          from &= TAIL_OFFSET_MASK;
+          from |= (offset + moved) << 32;
+        }
+        if (pos < len) return CEDAR_NO_PATH; // input > tail, input != tail
+      }
+      if (tail[pos]) return CEDAR_NO_VALUE;  // input < tail
+      return *reinterpret_cast <const int*> (&tail[len + 1]);
+    }
+#ifndef USE_FAST_LOAD
+    void _restore_ninfo () {
+      _realloc_array (_ninfo, _size);
+      for (int to = 0; to < _size; ++to) {
+        const int from = _array[to].check;
+        if (from < 0) continue; // skip empty node
+        const int base = _array[from].base;
+        if (const uchar label = static_cast <uchar> (base ^ to)) // skip leaf
+          _push_sibling (static_cast <size_t> (from), base, label,
+                         ! from || _ninfo[from].child || _array[base ^ 0].check == from);
+      }
+    }
+    void _restore_block () {
+      _realloc_array (_block, _size >> 8);
+      _bheadF = _bheadC = _bheadO = 0;
+      for (int bi (0), e (0); e < _size; ++bi) { // register blocks to full
+        block& b = _block[bi];
+        b.num = 0;
+        for (; e < (bi << 8) + 256; ++e)
+          if (_array[e].check < 0 && ++b.num == 1) b.ehead = e;
+        int& head_out = b.num == 1 ? _bheadC : (b.num == 0 ? _bheadF : _bheadO);
+        _push_block (bi, head_out, ! head_out && b.num);
+      }
+    }
+#endif
+    void _set_result (result_type* x, value_type r, size_t = 0, npos_t = 0) const
+    { *x = r; }
+    void _set_result (result_pair_type* x, value_type r, size_t l, npos_t = 0) const
+    { x->value = r; x->length = l; }
+    void _set_result (result_triple_type* x, value_type r, size_t l, npos_t from) const
+    { x->value = r; x->length = l; x->id = from; }
+    void _pop_block (const int bi, int& head_in, const bool last) {
+      if (last) { // last one poped; Closed or Open
+        head_in = 0;
+      } else {
+        const block& b = _block[bi];
+        _block[b.prev].next = b.next;
+        _block[b.next].prev = b.prev;
+        if (bi == head_in) head_in = b.next;
+      }
+    }
+    void _push_block (const int bi, int& head_out, const bool empty) {
+      block& b = _block[bi];
+      if (empty) { // the destination is empty
+        head_out = b.prev = b.next = bi;
+      } else { // use most recently pushed
+        int& tail_out = _block[head_out].prev;
+        b.prev = tail_out;
+        b.next = head_out;
+        head_out = tail_out = _block[tail_out].next = bi;
+      }
+    }
+    int _add_block () {
+      if (_size == _capacity) { // allocate memory if needed
+#ifdef USE_EXACT_FIT
+        _capacity += _size >= MAX_ALLOC_SIZE ? MAX_ALLOC_SIZE : _size;
+#else
+        _capacity += _capacity;
+#endif
+        _realloc_array (_array, _capacity, _capacity);
+        _realloc_array (_ninfo, _capacity, _size);
+        _realloc_array (_block, _capacity >> 8, _size >> 8);
+      }
+      _block[_size >> 8].ehead = _size;
+      _array[_size] = node (- (_size + 255),  - (_size + 1));
+      for (int i = _size + 1; i < _size + 255; ++i)
+        _array[i] = node (-(i - 1), -(i + 1));
+      _array[_size + 255] = node (- (_size + 254),  -_size);
+      _push_block (_size >> 8, _bheadO, ! _bheadO); // append to block Open
+      _size += 256;
+      return (_size >> 8) - 1;
+    }
+    // transfer block from one start w/ head_in to one start w/ head_out
+    void _transfer_block (const int bi, int& head_in, int& head_out) {
+      _pop_block  (bi, head_in, bi == _block[bi].next);
+      _push_block (bi, head_out, ! head_out && _block[bi].num);
+    }
+    // pop empty node from block; never transfer the special block (bi = 0)
+    int _pop_enode (const int base, const uchar label, const int from) {
+      const int e  = base < 0 ? _find_place () : base ^ label;
+      const int bi = e >> 8;
+      node&  n = _array[e];
+      block& b = _block[bi];
+      if (--b.num == 0) {
+        if (bi) _transfer_block (bi, _bheadC, _bheadF); // Closed to Full
+      } else { // release empty node from empty ring
+        _array[-n.base].check = n.check;
+        _array[-n.check].base = n.base;
+        if (e == b.ehead) b.ehead = -n.check; // set ehead
+        if (bi && b.num == 1 && b.trial != MAX_TRIAL) // Open to Closed
+          _transfer_block (bi, _bheadO, _bheadC);
+      }
+      // initialize the released node
+      if (label) n.base = -1; else n.value = value_type (0);
+      n.check = from;
+      if (base < 0) _array[from].base = e ^ label;
+      return e;
+    }
+    // push empty node into empty ring
+    void _push_enode (const int e) {
+      const int bi = e >> 8;
+      block& b = _block[bi];
+      if (++b.num == 1) { // Full to Closed
+        b.ehead = e;
+        _array[e] = node (-e, -e);
+        if (bi) _transfer_block (bi, _bheadF, _bheadC); // Full to Closed
+      } else {
+        const int prev = b.ehead;
+        const int next = -_array[prev].check;
+        _array[e] = node (-prev, -next);
+        _array[prev].check = _array[next].base = -e;
+        if (b.num == 2 || b.trial == MAX_TRIAL) { // Closed to Open
+          if (bi) _transfer_block (bi, _bheadC, _bheadO);
+        }
+        b.trial = 0;
+      }
+      if (b.reject < _reject[b.num]) b.reject = _reject[b.num];
+      _ninfo[e] = ninfo (); // reset ninfo; no child, no sibling
+    }
+    // push label to from's child
+    void _push_sibling (const npos_t from, const int base, const uchar label, const bool flag = true) {
+      uchar* c = &_ninfo[from].child;
+      if (flag && (ORDERED ? label > *c : ! *c))
+        do c = &_ninfo[base ^ *c].sibling; while (ORDERED && *c && *c < label);
+      _ninfo[base ^ label].sibling = *c, *c = label;
+    }
+    // pop label from from's child
+    void _pop_sibling (const npos_t from, const int base, const uchar label) {
+      uchar* c = &_ninfo[from].child;
+      while (*c != label) c = &_ninfo[base ^ *c].sibling;
+      *c = _ninfo[base ^ label].sibling;
+    }
+    // check whether to replace branching w/ the newly added node
+    bool _consult (const int base_n, const int base_p, uchar c_n, uchar c_p) const {
+      do if (! (c_p = _ninfo[base_p ^ c_p].sibling)) return false;
+      while ((c_n = _ninfo[base_n ^ c_n].sibling));
+      return true;
+    }
+    // enumerate (equal to or more than one) child nodes
+    uchar* _set_child (uchar* p, const int base, uchar c, const int label = -1) {
+      --p;
+      if (! c)  { *++p = c; c = _ninfo[base ^ c].sibling; } // 0: terminal
+      if (ORDERED)
+        while (c && c < label) { *++p = c; c = _ninfo[base ^ c].sibling; }
+      if (label != -1) *++p = static_cast <uchar> (label);
+      while (c) { *++p = c; c = _ninfo[base ^ c].sibling; }
+      return p;
+    }
+    // explore new block to settle down
+    int _find_place () {
+      if (_bheadC) return _block[_bheadC].ehead;
+      if (_bheadO) return _block[_bheadO].ehead;
+      return _add_block () << 8;
+    }
+    int _find_place (const uchar* const first, const uchar* const last) {
+      if (int bi = _bheadO) {
+        const int   bz = _block[_bheadO].prev;
+        const short nc = static_cast <short> (last - first + 1);
+        while (1) { // set candidate block
+          block& b = _block[bi];
+          if (b.num >= nc && nc < b.reject) // explore configuration
+            for (int e = b.ehead;;) {
+              const int base = e ^ *first;
+              for (const uchar* p = first; _array[base ^ *++p].check < 0; )
+                if (p == last) return b.ehead = e; // no conflict
+              if ((e = -_array[e].check) == b.ehead) break;
+            }
+          b.reject = nc;
+          if (b.reject < _reject[b.num]) _reject[b.num] = b.reject;
+          const int bi_ = b.next;
+          if (++b.trial == MAX_TRIAL) _transfer_block (bi, _bheadO, _bheadC);
+          if (bi == bz) break;
+          bi = bi_;
+        }
+      }
+      return _add_block () << 8;
+    }
+    // resolve conflict on base_n ^ label_n = base_p ^ label_p
+    template <typename T>
+    int _resolve (npos_t& from_n, const int base_n, const uchar label_n, T& cf) {
+      // examine siblings of conflicted nodes
+      const int to_pn  = base_n ^ label_n;
+      const int from_p = _array[to_pn].check;
+      const int base_p = _array[from_p].base;
+      const bool flag // whether to replace siblings of newly added
+        = _consult (base_n, base_p, _ninfo[from_n].child, _ninfo[from_p].child);
+      uchar child[256];
+      uchar* const first = &child[0];
+      uchar* const last  =
+        flag ? _set_child (first, base_n, _ninfo[from_n].child, label_n)
+        : _set_child (first, base_p, _ninfo[from_p].child);
+      const int base =
+        (first == last ? _find_place () : _find_place (first, last)) ^ *first;
+      // replace & modify empty list
+      const int from  = flag ? static_cast <int> (from_n) : from_p;
+      const int base_ = flag ? base_n : base_p;
+      if (flag && *first == label_n) _ninfo[from].child = label_n; // new child
+      _array[from].base = base; // new base
+      for (const uchar* p = first; p <= last; ++p) { // to_ => to
+        const int to  = _pop_enode (base, *p, from);
+        const int to_ = base_ ^ *p;
+        _ninfo[to].sibling = (p == last ? 0 : *(p + 1));
+        if (flag && to_ == to_pn) continue; // skip newcomer (no child)
+        cf (to_, to);
+        node& n  = _array[to];
+        node& n_ = _array[to_];
+        if ((n.base = n_.base) > 0 && *p) { // copy base; bug fix
+          uchar c = _ninfo[to].child = _ninfo[to_].child;
+          do _array[n.base ^ c].check = to; // adjust grand son's check
+          while ((c = _ninfo[n.base ^ c].sibling));
+        }
+        if (! flag && to_ == static_cast <int> (from_n)) // parent node moved
+          from_n = static_cast <size_t> (to); // bug fix
+        if (! flag && to_ == to_pn) { // the address is immediately used
+          _push_sibling (from_n, to_pn ^ label_n, label_n);
+          _ninfo[to_].child = 0; // remember to reset child
+          if (label_n) n_.base = -1; else n_.value = value_type (0);
+          n_.check = static_cast <int> (from_n);
+        } else
+          _push_enode (to_);
+        if (NUM_TRACKING_NODES) // keep the traversed node updated
+          for (size_t j = 0; tracking_node[j] != 0; ++j) {
+            if (static_cast <int> (tracking_node[j] & TAIL_OFFSET_MASK) == to_) {
+              tracking_node[j] &= NODE_INDEX_MASK;
+              tracking_node[j] |= static_cast <npos_t> (to);
+            }
+          }
+      }
+      return flag ? base ^ label_n : to_pn;
+    }
+    // test the validity of double array for debug
+    void _test (const npos_t from = 0) const {
+      const int base = _array[from].base;
+      if (base < 0) { // validate tail offset
+        assert (*_length >= static_cast <int> (-base + 1 + sizeof (value_type)));
+        return;
+      }
+      uchar c = _ninfo[from].child;
+      do {
+        if (from) assert (_array[base ^ c].check == static_cast <int> (from));
+        if (c) _test (static_cast <npos_t> (base ^ c));
+      } while ((c = _ninfo[base ^ c].sibling));
+    }
+  };
+}
+#endif
--- a/libchinese-segmentation/storage-base/darts-clone/darts.h
+++ b/libchinese-segmentation/storage-base/darts-clone/darts.h
--- a/libchinese-segmentation/storage-base/storage-base-cedar.pri
+++ b/libchinese-segmentation/storage-base/storage-base-cedar.pri
@ -0,0 +1,12 @@
+INCLUDEPATH += $$PWD
+
+HEADERS += \
+    $$PWD/darts-clone/darts.h \
+    $$PWD/cedar/cedarpp.h \
+    $$PWD/cedar/cedar.h \
+    $$PWD/storage-base.h \
+    $$PWD/storage-base.hpp
+
+SOURCES += \
+    $$PWD/storage-base.cpp
+
--- a/libchinese-segmentation/storage-base/storage-base.cpp
+++ b/libchinese-segmentation/storage-base/storage-base.cpp
@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef STORAGEBASE_CPP
+#define STORAGEBASE_CPP
+
+#include "storage-base.h"
+
+template<const bool ordered, typename cache_file_header>
+StorageBase<ordered, cache_file_header>::StorageBase(const vector<string> file_paths, string dat_cache_path)
+    :m_file_paths(file_paths), m_dat_cache_path(dat_cache_path), m_double_array_data_trie(new cedar::da<int, -1, -2, ordered>)
+{
+    static_assert(std::is_base_of<CacheFileHeaderBase, header_type>::value, "CacheFileHeader class not derived from CacheFileHeaderBase!");
+}
+
+template<const bool ordered, typename cache_file_header>
+void StorageBase<ordered, cache_file_header>::Init()
+{
+    int file_size_sum = 0;
+    const string md5 = CalcFileListMD5(m_file_paths, file_size_sum);
+    m_total_dict_size = file_size_sum;
+
+    if (m_dat_cache_path.empty()) {
+        m_dat_cache_path = "/tmp/" + md5 + ".dat_";//未指定词库数据文件存储位置的默认存储在tmp目录下
+    }
+     m_dat_cache_path += VERSION;
+    if (InitAttachDat(m_dat_cache_path, md5)) {
+        return;
+    }
+
+    LoadSourceFile(m_dat_cache_path, md5);//构建DATrie，写入dat文件
+
+    bool build_ret = InitAttachDat(m_dat_cache_path, md5);
+
+    assert(build_ret);
+}
+
+template<const bool ordered, typename cache_file_header>
+string StorageBase<ordered, cache_file_header>::Find(const string &key)
+{
+    int result = m_double_array_data_trie->exactMatchSearch<int>(key.c_str(), key.size());
+    if (result < 0)
+        return string();
+    return string(&m_elements_ptr[result]);
+}
+
+template<const bool ordered, typename cache_file_header>
+bool StorageBase<ordered, cache_file_header>::Contains(string &word)
+{
+    if (this->Find(word) != string())
+        return true;
+    return false;
+}
+
+template<const bool ordered, typename cache_file_header>
+bool StorageBase<ordered, cache_file_header>::IsMultiTone(const string &word)
+{
+    string result = this->Find(word);
+    if (result.find(",") == result.npos)
+        return true;
+    return false;
+}
+
+template<const bool ordered, typename cache_file_header>
+int StorageBase<ordered, cache_file_header>::GetTotalDictSize() const
+{
+    return m_total_dict_size;
+}
+
+template<const bool ordered, typename cache_file_header>
+StorageBase<ordered, cache_file_header>::~StorageBase()
+{
+    munmap(m_mmap_addr, m_mmap_length);
+    m_mmap_addr = nullptr;
+    close(m_mmap_fd);
+    m_mmap_fd = -1;
+
+    if (m_double_array_data_trie)
+        delete m_double_array_data_trie;
+    m_double_array_data_trie = nullptr;
+}
+
+template<const bool ordered, typename cache_file_header>
+cedar::da<int, -1, -2, ordered> *StorageBase<ordered, cache_file_header>::GetDoubleArrayDataTrie()
+{
+    return m_double_array_data_trie;
+}
+
+template<const bool ordered, typename cache_file_header>
+const void *StorageBase<ordered, cache_file_header>::GetDataTrieArray()
+{
+    return m_double_array_data_trie->array();
+}
+
+template<const bool ordered, typename cache_file_header>
+int StorageBase<ordered, cache_file_header>::GetDataTrieSize()
+{
+    return m_double_array_data_trie->size();
+}
+
+template<const bool ordered, typename cache_file_header>
+int StorageBase<ordered, cache_file_header>::GetDataTrieTotalSize()
+{
+    return m_double_array_data_trie->total_size();
+}
+
+template<const bool ordered, typename cache_file_header>
+cache_file_header *StorageBase<ordered, cache_file_header>::GetCacheFileHeaderPtr()
+{
+    return reinterpret_cast<header_type*>(m_mmap_addr);
+}
+
+
+template<const bool ordered, typename cache_file_header>
+bool StorageBase<ordered, cache_file_header>::InitAttachDat(const string &dat_cache_file, const string &md5)
+{
+    m_mmap_fd = open(dat_cache_file.c_str(), O_RDONLY);
+
+    if (m_mmap_fd < 0) {
+        return false;
+    }
+
+    const auto seek_off = lseek(m_mmap_fd, 0, SEEK_END);
+    if (seek_off < 0){
+        close(m_mmap_fd);
+        m_mmap_fd = -1;
+        return false;
+    };
+
+    m_mmap_length = seek_off;
+    m_mmap_addr = reinterpret_cast<char *>(mmap(NULL, m_mmap_length, PROT_READ, MAP_SHARED, m_mmap_fd, 0));
+    if (m_mmap_addr == MAP_FAILED) {
+        close(m_mmap_fd);
+        m_mmap_fd = -1;
+        return false;
+    }
+    if (m_mmap_length < sizeof(header_type)) {
+        munmap(m_mmap_addr, m_mmap_length);
+        m_mmap_addr = nullptr;
+        close(m_mmap_fd);
+        m_mmap_fd = -1;
+        return false;
+    }
+    header_type & header = *reinterpret_cast<header_type*>(m_mmap_addr);
+
+    if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())
+            or m_mmap_length != sizeof(header_type) + header.elements_size  + header.dat_size * m_double_array_data_trie->unit_size()) {
+        munmap(m_mmap_addr, m_mmap_length);
+        m_mmap_addr = nullptr;
+        close(m_mmap_fd);
+        m_mmap_fd = -1;
+        return false;
+    }
+
+    m_elements_ptr = (const char *)(m_mmap_addr + sizeof(header_type));
+    const char * dat_ptr = m_mmap_addr + sizeof(header_type) + header.elements_size;
+    this->m_double_array_data_trie->set_array((char *)dat_ptr, header.dat_size);
+    return true;
+}
+
+string CalcFileListMD5(const vector<string> &files_list, int &file_size_sum) {
+    limonp::MD5 md5;
+    file_size_sum = 0;
+
+    for (auto const & local_path : files_list) {
+        const int fd = open(local_path.c_str(), O_RDONLY);
+        if (fd < 0){
+            continue;
+        }
+        auto const len = lseek(fd, 0, SEEK_END);
+        if (len > 0) {
+            void * addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+            assert(MAP_FAILED != addr);
+
+            md5.Update((unsigned char *) addr, len);
+            file_size_sum += len;
+
+            munmap(addr, len);
+        }
+        close(fd);
+    }
+
+    md5.Final();
+    return string(md5.digestChars);
+}
+#endif
--- a/libchinese-segmentation/storage-base/storage-base.h
+++ b/libchinese-segmentation/storage-base/storage-base.h
@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef STORAGEBASE_H
+#define STORAGEBASE_H
+#include <string>
+#include <vector>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include "Md5.hpp"
+#include "StringUtil.hpp"
+#include "cedar.h"
+using namespace std;
+
+struct CacheFileHeaderBase { //todo 字节对齐
+    char     md5_hex[32] = {};
+    uint32_t elements_num = 0;
+    uint32_t elements_size = 0;
+    uint32_t dat_size = 0;
+};
+
+template<const bool ordered = false, typename cache_file_header = CacheFileHeaderBase>
+class StorageBase
+{
+public:
+    typedef cache_file_header header_type;
+
+    StorageBase(const vector<string> file_paths, string dat_cache_path = "");
+
+    virtual void Init();
+
+    virtual string Find(const string &key);
+
+    virtual bool Contains(string &word);
+
+    virtual bool IsMultiTone(const string &word);
+
+    virtual int GetTotalDictSize() const;
+
+    virtual void LoadSourceFile(const string &dat_cache_file, const string &md5) = 0;
+
+    virtual ~StorageBase();
+
+    cedar::da<int, -1, -2, ordered> * GetDoubleArrayDataTrie();
+    const void * GetDataTrieArray();
+    int GetDataTrieSize();
+    int GetDataTrieTotalSize();
+
+    cache_file_header * GetCacheFileHeaderPtr();
+
+private:
+    StorageBase();
+    StorageBase(const StorageBase&);
+    StorageBase& operator = (const StorageBase&);
+
+    bool InitAttachDat(const string &dat_cache_file, const string &md5);
+
+    vector<string> m_file_paths;
+    string m_dat_cache_path;
+
+    cedar::da<int, -1, -2, ordered> * m_double_array_data_trie = nullptr;
+
+    const char * m_elements_ptr = nullptr;
+
+    int    m_mmap_fd = -1;
+    int    m_mmap_length = 0;
+    char * m_mmap_addr = nullptr;
+
+    int    m_total_dict_size = 0;
+
+};
+
+inline string CalcFileListMD5(const vector<string> &files_list, int & file_size_sum);
+#include "storage-base.cpp"
+#endif // STORAGEBASE_H
--- a/libchinese-segmentation/storage-base/storage-base.hpp
+++ b/libchinese-segmentation/storage-base/storage-base.hpp
@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef STORAGEBASE_H
+#define STORAGEBASE_H
+#include <string>
+#include <vector>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include "Md5.hpp"
+#include "StringUtil.hpp"
+//#define USE_DARTS
+#ifdef USE_DARTS
+#include "../storage-base/darts-clone/darts.h"
+#include <cassert>
+#else
+#include "../storage-base/cedar/cedar.h"
+#endif
+using namespace std;
+
+inline string CalcFileListMD5(const vector<string> &files_list, int & file_size_sum)
+{
+    limonp::MD5 md5;
+    file_size_sum = 0;
+
+    for (auto const & local_path : files_list) {
+        const int fd = open(local_path.c_str(), O_RDONLY);
+        if (fd < 0){
+            continue;
+        }
+        auto const len = lseek(fd, 0, SEEK_END);
+        if (len > 0) {
+            void * addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+            assert(MAP_FAILED != addr);
+
+            md5.Update((unsigned char *) addr, len);
+            file_size_sum += len;
+
+            munmap(addr, len);
+        }
+        close(fd);
+    }
+
+    md5.Final();
+    return string(md5.digestChars);
+}
+
+struct CacheFileHeaderBase { //todo 字节对齐
+    char     md5_hex[32] = {};
+    uint32_t elements_num = 0;
+    uint32_t elements_size = 0;
+    uint32_t dat_size = 0;
+};
+
+template<typename element_ptr_type, const bool ordered = false, typename cache_file_header = CacheFileHeaderBase>
+class StorageBase
+{
+public:
+    typedef cache_file_header header_type;
+#ifdef USE_DARTS
+    typedef typename Darts::DoubleArray::result_pair_type result_pair_type;
+    StorageBase(const vector<string> file_paths, string dat_cache_path = "")
+        :m_file_paths(file_paths), m_dat_cache_path(dat_cache_path), m_double_array_data_trie(new Darts::DoubleArray)
+    {
+        static_assert(std::is_base_of<CacheFileHeaderBase, header_type>::value, "CacheFileHeader class not derived from CacheFileHeaderBase!");
+    }
+#else
+    typedef typename cedar::da<int, -1, -2, ordered>::result_pair_type result_pair_type;
+    StorageBase(const vector<string> file_paths, string dat_cache_path = "")
+        :m_file_paths(file_paths), m_dat_cache_path(dat_cache_path)/*, m_double_array_data_trie(new cedar::da<int, -1, -2, ordered>)*/
+    {
+        static_assert(std::is_base_of<CacheFileHeaderBase, header_type>::value, "CacheFileHeader class not derived from CacheFileHeaderBase!");
+    }
+#endif
+    virtual void Init()
+    {
+        int file_size_sum = 0;
+        const string md5 = CalcFileListMD5(m_file_paths, file_size_sum);
+        m_total_dict_size = file_size_sum;
+
+        if (m_dat_cache_path.empty()) {
+            m_dat_cache_path = "/tmp/" + md5 + ".dat_";//未指定词库数据文件存储位置的默认存储在tmp目录下
+        }
+         m_dat_cache_path += VERSION;
+        if (InitAttachDat(m_dat_cache_path, md5)) {
+            return;
+        }
+
+        LoadSourceFile(m_dat_cache_path, md5);//构建DATrie，写入dat文件
+
+        bool build_ret = InitAttachDat(m_dat_cache_path, md5);
+
+        assert(build_ret);
+    }
+
+    virtual void LoadSourceFile(const string &dat_cache_file, const string &md5) = 0;
+
+    virtual ~StorageBase()
+    {
+        munmap(m_mmap_addr, m_mmap_length);
+        m_mmap_addr = nullptr;
+        close(m_mmap_fd);
+        m_mmap_fd = -1;
+    }
+#ifndef USE_DARTS
+    inline int Update(const char* key, size_t len, int val)
+    {
+        return m_double_array_data_trie.update(key, len, val);
+    }
+#endif
+    inline size_t CommonPrefixSearch(const char* key, result_pair_type* result, size_t result_len) const
+    {
+        return m_double_array_data_trie.commonPrefixSearch(key, result, result_len);
+    }
+
+    inline int ExactMatchSearch(const char* key, size_t len) const
+    {
+        return m_double_array_data_trie.template exactMatchSearch<int>(key, len);
+    }
+
+    inline const void * GetDataTrieArray()
+    {
+        return m_double_array_data_trie.array();
+    }
+
+    inline int GetDataTrieSize()
+    {
+        return m_double_array_data_trie.size();
+    }
+
+    inline int GetDataTrieTotalSize()
+    {
+        return m_double_array_data_trie.total_size();
+    }
+
+    inline cache_file_header * GetCacheFileHeaderPtr() const
+    {
+        return reinterpret_cast<header_type*>(m_mmap_addr);
+    }
+
+    inline const element_ptr_type * GetElementPtr() const
+    {
+        return m_elements_ptr;
+    }
+
+private:
+    StorageBase();
+    StorageBase(const StorageBase&);
+    StorageBase& operator = (const StorageBase&);
+
+    bool InitAttachDat(const string &dat_cache_file, const string &md5)
+    {
+        m_mmap_fd = open(dat_cache_file.c_str(), O_RDONLY);
+
+        if (m_mmap_fd < 0) {
+            return false;
+        }
+
+        const auto seek_off = lseek(m_mmap_fd, 0, SEEK_END);
+        if (seek_off < 0){
+            close(m_mmap_fd);
+            m_mmap_fd = -1;
+            return false;
+        };
+
+        m_mmap_length = seek_off;
+        m_mmap_addr = reinterpret_cast<char *>(mmap(NULL, m_mmap_length, PROT_READ, MAP_SHARED, m_mmap_fd, 0));
+        if (m_mmap_addr == MAP_FAILED) {
+            close(m_mmap_fd);
+            m_mmap_fd = -1;
+            return false;
+        }
+        if (m_mmap_length < sizeof(header_type)) {
+            munmap(m_mmap_addr, m_mmap_length);
+            m_mmap_addr = nullptr;
+            close(m_mmap_fd);
+            m_mmap_fd = -1;
+            return false;
+        }
+        header_type & header = *reinterpret_cast<header_type*>(m_mmap_addr);
+
+        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())
+                or m_mmap_length != sizeof(header_type) + header.elements_size  + header.dat_size * m_double_array_data_trie.unit_size()) {
+            munmap(m_mmap_addr, m_mmap_length);
+            m_mmap_addr = nullptr;
+            close(m_mmap_fd);
+            m_mmap_fd = -1;
+            return false;
+        }
+
+        m_elements_ptr = (const element_ptr_type *)(m_mmap_addr + sizeof(header_type));
+        const char * dat_ptr = m_mmap_addr + sizeof(header_type) + header.elements_size;
+        this->m_double_array_data_trie.set_array((char *)dat_ptr, header.dat_size);
+        return true;
+    }
+
+    vector<string> m_file_paths;
+    string m_dat_cache_path;
+
+#ifdef USE_DARTS
+    Darts::DoubleArray m_double_array_data_trie;
+#else
+    cedar::da<int, -1, -2, ordered> m_double_array_data_trie;
+#endif
+
+    const element_ptr_type * m_elements_ptr = nullptr;
+    int    m_mmap_fd = -1;
+    size_t    m_mmap_length = 0;
+    char * m_mmap_addr = nullptr;
+    int    m_total_dict_size = 0;
+
+};
+
+#endif // STORAGEBASE_H
--- a/libchinese-segmentation/test/main.cpp
+++ b/libchinese-segmentation/test/main.cpp
@ -0,0 +1,11 @@
+#include "mainwindow.h"
+
+#include <QApplication>
+
+int main(int argc, char *argv[])
+{
+    QApplication a(argc, argv);
+    MainWindow w;
+    w.show();
+    return a.exec();
+}
--- a/libchinese-segmentation/test/mainwindow.cpp
+++ b/libchinese-segmentation/test/mainwindow.cpp
@ -0,0 +1,92 @@
+#include "mainwindow.h"
+#include "ui_mainwindow.h"
+#include <HanZiToPinYin>
+#include <ChineseSegmentation>
+#include <QMenu>
+#include <QDebug>
+#include <QStringList>
+
+MainWindow::MainWindow(QWidget *parent)
+    : QMainWindow(parent)
+    , ui(new Ui::MainWindow)
+{
+    ui->setupUi(this);
+    QMenu * menu = new QMenu(this);
+    menu->addAction("Default");
+    menu->addAction("Tone");
+    menu->addAction("Tone2");
+    menu->addAction("Tone3");
+    menu->addAction("FirstLetter");
+    ui->toolButton->setMenu(menu);
+    initconnections();
+    ui->lineEdit_2->setFocus();
+}
+
+MainWindow::~MainWindow()
+{
+    delete ui;
+}
+
+void MainWindow::initconnections()
+{
+    connect(ui->toolButton->menu(), &QMenu::triggered, [&](QAction *action){
+        qDebug() << "tool button:" << action->text();
+        m_action = action->text();
+        ui->toolButton->setText(action->text());
+    });
+    connect(ui->pushButton, &QPushButton::pressed, [&]() {
+        PinyinDataStyle dataStyle;
+        SegType segType;
+        PolyphoneType polyType;
+        ExDataProcessType exType;
+
+        if (m_action == "Default") {
+            dataStyle = PinyinDataStyle::Default;
+        } else if (m_action == "Tone") {
+            dataStyle = PinyinDataStyle::Tone;
+        } else if (m_action == "Tone2") {
+            dataStyle = PinyinDataStyle::Tone2;
+        } else if (m_action == "Tone3") {
+            dataStyle = PinyinDataStyle::Tone3;
+        } else if (m_action == "FirstLetter") {
+            dataStyle = PinyinDataStyle::FirstLetter;
+        }
+
+        if(!ui->checkSegBox->isChecked())
+            segType = SegType::Segmentation;
+        else
+            segType = SegType::NoSegmentation;
+
+        if(ui->checkPolyBox_2->isChecked())
+            polyType = PolyphoneType::Enable;
+        else
+            polyType = PolyphoneType::Disable;
+
+        if (ui->checkExBox_3->isChecked())
+            exType = ExDataProcessType::Default;
+        else
+            exType = ExDataProcessType::Delete;
+
+        HanZiToPinYin::getInstance()->setConfig(dataStyle, segType, polyType, exType);
+
+        ui->lineEdit_4->clear();
+        QString text = ui->lineEdit_2->text();
+        qDebug() << "input:" << text;
+
+        QStringList list;
+        HanZiToPinYin::getInstance()->getResults(text.toStdString(), list);
+
+        ui->lineEdit_4->setText(list.join(" "));
+        qDebug() << "result:" << list.join(" ");
+
+        vector<KeyWord> result = ChineseSegmentation::getInstance()->callSegment(ui->lineEdit_2->text().toStdString());
+
+        list.clear();
+        for (auto &info:result) {
+            list.append(QString().fromStdString(info.word));
+        }
+        ui->lineEdit_6->setText(list.join("/"));
+
+    });
+}
+
--- a/libchinese-segmentation/test/mainwindow.h
+++ b/libchinese-segmentation/test/mainwindow.h
@ -0,0 +1,23 @@
+#ifndef MAINWINDOW_H
+#define MAINWINDOW_H
+
+#include <QtWidgets>
+
+QT_BEGIN_NAMESPACE
+namespace Ui { class MainWindow; }
+QT_END_NAMESPACE
+
+class MainWindow : public QMainWindow
+{
+    Q_OBJECT
+
+public:
+    MainWindow(QWidget *parent = nullptr);
+    ~MainWindow();
+
+private:
+    void initconnections();
+    Ui::MainWindow *ui;
+    QString m_action;
+};
+#endif // MAINWINDOW_H
--- a/libchinese-segmentation/test/mainwindow.ui
+++ b/libchinese-segmentation/test/mainwindow.ui
@ -0,0 +1,181 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>MainWindow</class>
+ <widget class="QMainWindow" name="MainWindow">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>800</width>
+    <height>600</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>MainWindow</string>
+  </property>
+  <widget class="QWidget" name="centralwidget">
+   <widget class="QPushButton" name="pushButton">
+    <property name="geometry">
+     <rect>
+      <x>40</x>
+      <y>440</y>
+      <width>191</width>
+      <height>81</height>
+     </rect>
+    </property>
+    <property name="text">
+     <string>点击开始</string>
+    </property>
+   </widget>
+   <widget class="QLineEdit" name="lineEdit">
+    <property name="geometry">
+     <rect>
+      <x>40</x>
+      <y>20</y>
+      <width>91</width>
+      <height>31</height>
+     </rect>
+    </property>
+    <property name="text">
+     <string>输入文字：</string>
+    </property>
+    <property name="readOnly">
+     <bool>true</bool>
+    </property>
+   </widget>
+   <widget class="QLineEdit" name="lineEdit_2">
+    <property name="geometry">
+     <rect>
+      <x>40</x>
+      <y>70</y>
+      <width>711</width>
+      <height>41</height>
+     </rect>
+    </property>
+   </widget>
+   <widget class="QLineEdit" name="lineEdit_3">
+    <property name="geometry">
+     <rect>
+      <x>40</x>
+      <y>310</y>
+      <width>121</width>
+      <height>31</height>
+     </rect>
+    </property>
+    <property name="text">
+     <string>拼音转换结果：</string>
+    </property>
+    <property name="readOnly">
+     <bool>true</bool>
+    </property>
+   </widget>
+   <widget class="QLineEdit" name="lineEdit_4">
+    <property name="geometry">
+     <rect>
+      <x>40</x>
+      <y>360</y>
+      <width>711</width>
+      <height>41</height>
+     </rect>
+    </property>
+    <property name="readOnly">
+     <bool>true</bool>
+    </property>
+   </widget>
+   <widget class="QCheckBox" name="checkSegBox">
+    <property name="geometry">
+     <rect>
+      <x>280</x>
+      <y>430</y>
+      <width>111</width>
+      <height>29</height>
+     </rect>
+    </property>
+    <property name="text">
+     <string>不启用分词</string>
+    </property>
+   </widget>
+   <widget class="QCheckBox" name="checkPolyBox_2">
+    <property name="geometry">
+     <rect>
+      <x>280</x>
+      <y>470</y>
+      <width>131</width>
+      <height>29</height>
+     </rect>
+    </property>
+    <property name="text">
+     <string>启用多音字</string>
+    </property>
+   </widget>
+   <widget class="QToolButton" name="toolButton">
+    <property name="geometry">
+     <rect>
+      <x>530</x>
+      <y>460</y>
+      <width>181</width>
+      <height>30</height>
+     </rect>
+    </property>
+    <property name="text">
+     <string>数据形式...</string>
+    </property>
+    <property name="popupMode">
+     <enum>QToolButton::MenuButtonPopup</enum>
+    </property>
+    <property name="autoRaise">
+     <bool>false</bool>
+    </property>
+   </widget>
+   <widget class="QCheckBox" name="checkExBox_3">
+    <property name="geometry">
+     <rect>
+      <x>280</x>
+      <y>510</y>
+      <width>181</width>
+      <height>29</height>
+     </rect>
+    </property>
+    <property name="text">
+     <string>无拼音数据原数据返回</string>
+    </property>
+   </widget>
+   <widget class="QLineEdit" name="lineEdit_5">
+    <property name="geometry">
+     <rect>
+      <x>40</x>
+      <y>160</y>
+      <width>113</width>
+      <height>31</height>
+     </rect>
+    </property>
+    <property name="text">
+     <string>分词结果：</string>
+    </property>
+   </widget>
+   <widget class="QLineEdit" name="lineEdit_6">
+    <property name="geometry">
+     <rect>
+      <x>40</x>
+      <y>220</y>
+      <width>711</width>
+      <height>41</height>
+     </rect>
+    </property>
+   </widget>
+  </widget>
+  <widget class="QMenuBar" name="menubar">
+   <property name="geometry">
+    <rect>
+     <x>0</x>
+     <y>0</y>
+     <width>800</width>
+     <height>28</height>
+    </rect>
+   </property>
+  </widget>
+  <widget class="QStatusBar" name="statusbar"/>
+ </widget>
+ <resources/>
+ <connections/>
+</ui>
--- a/libchinese-segmentation/test/test.pro
+++ b/libchinese-segmentation/test/test.pro
@ -0,0 +1,26 @@
+QT += core gui
+greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
+CONFIG += c++11 link_pkgconfig
+
+PKGCONFIG += chinese-segmentation
+
+# The following define makes your compiler emit warnings if you use
+# any Qt feature that has been marked deprecated (the exact warnings
+# depend on your compiler). Please consult the documentation of the
+# deprecated API in order to know how to port your code away from it.
+DEFINES += QT_DEPRECATED_WARNINGS
+
+# You can also make your code fail to compile if it uses deprecated APIs.
+# In order to do so, uncomment the following line.
+# You can also select to disable deprecated APIs only up to a certain version of Qt.
+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
+
+HEADERS += \
+        mainwindow.h
+
+SOURCES += \
+        main.cpp \
+        mainwindow.cpp
+
+FORMS += \
+    mainwindow.ui
--- a/libfriso/friso-interface.c
+++ b/libfriso/friso-interface.c
@ -1,161 +0,0 @@
-/*
- * Friso test program.
- *     Of couse you can make it a perfect demo for friso.
- * all threads or proccess share the same friso_t,
- *     defferent threads/proccess use defferent friso_task_t.
- * and you could share the friso_config_t if you wish...
- *
- * @author lionsoul<chenxin619315@gmail.com>
- */
-
-#include "friso-interface.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#define __LENGTH__ 15
-#define __INPUT_LENGTH__ 20480
-#define ___EXIT_INFO___                    \
-    println("Thanks for trying friso.");        \
-break;
-
-#define ___ABOUT___                    \
-    println("+---------------------------------------------------------------+");    \
-    println("| Friso - a Chinese word segmentation writen by c.              |");    \
-    println("| bug report email - chenxin619315@gmail.com.                   |");    \
-    println("| or: visit https://github.com/lionsoul2014/friso.              |");    \
-    println("|     java version for https://github.com/lionsoul2014/jcseg    |");    \
-    println("| type 'quit' to exit the program.                              |");    \
-    println("+---------------------------------------------------------------+");
-
-//read a line from a command line.
-static fstring getLine(FILE *fp, fstring __dst) {
-    register int c;
-    register fstring cs;
-
-    cs = __dst;
-    while((c = getc(fp)) != EOF) {
-        if(c == '\n') break;
-        *cs++ = c;
-    }
-    *cs = '\0';
-
-    return (c == EOF && cs == __dst) ? NULL : __dst;
-}
-
-/*static void printcode( fstring str ) {
-  int i,length;
-  length = strlen( str );
-  printf("str:length=%d\n", length );
-  for ( i = 0; i < length; i++ ) {
-  printf("%d ", str[i] );
-  }
-  putchar('\n');
-  }*/
-
-//int friso_test(int argc, char **argv)
-int friso_test() {
-
-    clock_t s_time, e_time;
-    char line[__INPUT_LENGTH__] = {0};
-    int i;
-    fstring __path__ = NULL, mode = NULL;
-
-    friso_t friso;
-    friso_config_t config;
-    friso_task_t task;
-
-    // get the lexicon directory from command line arguments
-//    for ( i = 0; i < argc; i++ ) {
-//        if ( strcasecmp( "-init", argv[i] ) == 0 ) {
-//            __path__ = argv[i+1];
-//        }
-//    }
-    __path__ = "/usr/share/ukui-search/res/friso.ini";
-
-    if(__path__ == NULL) {
-        println("Usage: friso -init lexicon path");
-        exit(0);
-    }
-
-    s_time = clock();
-
-    //initialize
-    friso  = friso_new();
-    config = friso_new_config();
-    /*friso_dic_t dic = friso_dic_new();
-      friso_dic_load_from_ifile( dic, __path__, __LENGTH__ );
-      friso_set_dic( friso, dic );
-      friso_set_mode( friso, __FRISO_COMPLEX_MODE__ );*/
-    if(friso_init_from_ifile(friso, config, __path__) != 1) {
-        printf("fail to initialize friso and config.\n");
-        goto err;
-    }
-
-    switch(config->mode) {
-    case __FRISO_SIMPLE_MODE__:
-        mode = "Simple";
-        break;
-    case __FRISO_COMPLEX_MODE__:
-        mode = "Complex";
-        break;
-    case __FRISO_DETECT_MODE__:
-        mode = "Detect";
-        break;
-    }
-
-    //friso_set_mode( config, __FRISO_DETECT_MODE__ );
-    //printf("clr_stw=%d\n", friso->clr_stw);
-    //printf("match c++?%d\n", friso_dic_match( friso->dic, __LEX_ENPUN_WORDS__, "c++" ));
-    //printf("match(研究)?%d\n", friso_dic_match( friso->dic, __LEX_CJK_WORDS__, "研究"));
-
-    e_time = clock();
-
-    printf("Initialized in %fsec\n", (double)(e_time - s_time) / CLOCKS_PER_SEC);
-    printf("Mode: %s\n", mode);
-    printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK");
-    ___ABOUT___;
-
-    //set the task.
-    task = friso_new_task();
-
-    while(1) {
-        print("friso>> ");
-        getLine(stdin, line);
-        //exit the programe
-        if(strcasecmp(line, "quit") == 0) {
-            ___EXIT_INFO___
-        }
-
-        //for ( i = 0; i < 1000000; i++ ) {
-        //set the task text.
-        friso_set_text(task, line);
-        println("分词结果:");
-
-        s_time = clock();
-        while((config->next_token(friso, config, task)) != NULL) {
-            printf(
-                "%s[%d, %d, %d] ",
-                task->token->word,
-                task->token->offset,
-                task->token->length,
-                task->token->rlen
-            );
-            // printf("%s ", task->token->word);
-        }
-        //}
-        e_time = clock();
-        printf("\nDone, cost < %fsec\n", ((double)(e_time - s_time)) / CLOCKS_PER_SEC);
-
-    }
-
-    friso_free_task(task);
-
-    //error block.
-err:
-    friso_free_config(config);
-    friso_free(friso);
-
-    return 0;
-}
--- a/libfriso/friso-interface.h
+++ b/libfriso/friso-interface.h
@ -1,10 +0,0 @@
-/*
- * temporary use friso.ini, it should be removed in the future.
- * MouseZhangZh
-*/
-#include "friso/src/friso_API.h"
-#include "friso/src/friso.h"
-#include "friso/src/friso_ctype.h"
-
-//int friso_test(int argc, char **argv);
-int friso_test();
--- a/libfriso/friso/LICENSE.md
+++ b/libfriso/friso/LICENSE.md
@ -1,225 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
-==========================================================================
-The following license applies to the Friso ANSI C library
--------------------------------------------------------------------------
-Copyright (c) 2010 lionsoul<chenxin619315@gmail.com>
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/libfriso/friso/friso.ini
+++ b/libfriso/friso/friso.ini
@ -1,68 +0,0 @@
-# friso configuration file.
-#	do not change the name of the left key.
-# @email	chenxin619315@gmail.com
-# @date		2012-12-20
-#
-
-# charset, only UTF8 and GBK support.
-# set it with UTF8(0) or GBK(1)
-friso.charset = 0
-
-# lexicon directory absolute path.
-#	the value must end with '/'
-# this will tell friso how to find friso.lex.ini configuration file and all the lexicon files.
-#
-# if it is not start with '/' for linux, or matches no ':' for winnt in its value 
-#	friso will search the friso.lex.ini relative to friso.ini
-# absolute path search:
-# linux:	friso.lex_dir = /c/products/friso/dict/UTF-8/
-# Winnt:	friso.lex_dir = D:/products/friso/dict/UTF-8/
-# relative path search (All system)
-friso.lex_dir = ./dict/UTF-8/
-
-# the maximum matching length.
-friso.max_len = 5
-
-# 1 for recognition chinese name.
-#	and 0 for closed it.
-friso.r_name = 1
-
-# the maximum length for the cjk words in a
-#	chinese and english mixed word.
-friso.mix_len = 2
-
-# the maxinum length for the chinese last name adron.
-friso.lna_len = 1
-
-# append the synonyms words
-friso.add_syn = 1
-
-# clear the stopwords or not (1 to open it and 0 to close it)
-# @date 2013-06-13
-friso.clr_stw = 0
-
-# keep the unrecongized words or not (1 to open it and 0 to close it)
-# @date 2013-06-13
-friso.keep_urec = 0
-
-# use sphinx output style like 'admire|love|enjoy einsten'
-# @date 2013-10-25
-friso.spx_out = 0
-
-# start the secondary segmentation for complex english token.
-friso.en_sseg = 1
-
-# min length of the secondary segmentation token. (better larger than 1)
-friso.st_minl = 2
-
-# default keep punctuations for english token.
-friso.kpuncs = @%.#&+
-
-# the threshold value for a char not a part of a chinese name.
-friso.nthreshold = 2000000
-
-# default mode for friso.
-# 1 : simple mode - simply maxmum matching algorithm.
-# 2 : complex mode - four rules of mmseg alogrithm.
-# 3 : detect mode - only return the words that the do exists in the lexicon
-friso.mode = 2
--- a/libfriso/friso/friso.pri
+++ b/libfriso/friso/friso.pri
@ -1,18 +0,0 @@
-INCLUDEPATH += $$PWD
-
-HEADERS += \
-    $$PWD/src/friso_API.h \
-    $$PWD/src/friso.h \
-    $$PWD/src/friso_ctype.h
-
-SOURCES += \
-    $$PWD/src/friso.c \
-    $$PWD/src/friso_lexicon.c \
-    $$PWD/src/friso_string.c \
-    $$PWD/src/friso_array.c \
-    $$PWD/src/friso_ctype.c \
-    $$PWD/src/friso_GBK.c \
-    $$PWD/src/friso_hash.c \
-    $$PWD/src/friso_link.c \
-    $$PWD/src/friso_UTF8.c
-
--- a/libfriso/friso/src/friso.c
+++ b/libfriso/friso/src/friso.c
--- a/libfriso/friso/src/friso.h
+++ b/libfriso/friso/src/friso.h
@ -1,370 +0,0 @@
-/*
- * main interface file for friso tokenizer.
- * you could modify it and re-release and free for commercial use.
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#ifndef _friso_h
-#define _friso_h
-
-#include "friso_API.h"
-#include <stdio.h>
-
-/* {{{ friso main interface define :: start*/
-#define FRISO_VERSION "1.6.4"
-#define friso_version() FRISO_VERSION
-
-
-#define DEFAULT_SEGMENT_LENGTH     5
-#define DEFAULT_MIX_LENGTH    2
-#define DEFAULT_LNA_LENGTH     1
-#define DEFAULT_NTHRESHOLD     1000000
-#define DEFAULT_SEGMENT_MODE     2
-
-/*
- * Type: friso_lex_t
- * -----------
- * This type used to represent the type of the lexicon.
- */
-typedef enum {
-    __LEX_CJK_WORDS__ = 0,
-    __LEX_CJK_UNITS__ = 1,
-    __LEX_ECM_WORDS__ = 2,    //english and chinese mixed words.
-    __LEX_CEM_WORDS__ = 3,    //chinese and english mixed words.
-    __LEX_CN_LNAME__ = 4,
-    __LEX_CN_SNAME__ = 5,
-    __LEX_CN_DNAME1__ = 6,
-    __LEX_CN_DNAME2__ = 7,
-    __LEX_CN_LNA__ = 8,
-    __LEX_STOPWORDS__ = 9,
-    __LEX_ENPUN_WORDS__ = 10,
-    __LEX_EN_WORDS__ = 11,
-    __LEX_OTHER_WORDS__ = 15,
-    __LEX_NCSYN_WORDS__ = 16,
-    __LEX_PUNC_WORDS__ = 17,        //punctuations
-    __LEX_UNKNOW_WORDS__ = 18        //unrecognized words.
-} friso_lex_t;
-
-typedef friso_hash_t * friso_dic_t;
-#define __FRISO_LEXICON_LENGTH__ 12
-
-
-//charset that Friso now support.
-typedef enum {
-    FRISO_UTF8    = 0,        //UTF-8
-    FRISO_GBK    = 1        //GBK
-} friso_charset_t;
-
-/*
- * Type: friso_mode_t
- * ------------------
- * use to identidy the mode that the friso use.
- */
-typedef enum {
-    __FRISO_SIMPLE_MODE__   = 1,
-    __FRISO_COMPLEX_MODE__  = 2,
-    __FRISO_DETECT_MODE__   = 3
-} friso_mode_t;
-
-/* friso entry.*/
-typedef struct {
-    friso_dic_t dic;        //friso dictionary
-    friso_charset_t charset;    //project charset.
-} friso_entry;
-typedef friso_entry * friso_t;
-
-
-
-/*
- * Type: lex_entry_cdt
- * -------------------
- * This type used to represent the lexicon entry struct.
- */
-#define _LEX_APPENSYN_MASK (1 << 0)    //append synoyums words.
-#define lex_appensyn_open(e)    e->ctrlMask |= _LEX_APPENSYN_MASK
-#define lex_appensyn_close(e)   e->ctrlMask &= ~_LEX_APPENSYN_MASK
-#define lex_appensyn_check(e)   ((e->ctrlMask & _LEX_APPENSYN_MASK) != 0)
-typedef struct {
-    /*
-     * the type of the lexicon item.
-     * available value is all the elements in friso_lex_t enum.
-     *    and if it is __LEX_OTHER_WORDS__, we need to free it after use it.
-     */
-    uchar_t length;     //the length of the token.(after the convertor of Friso.)
-    uchar_t rlen;       //the real length of the token.(before any convert)
-    uchar_t type;
-    uchar_t ctrlMask;   //function control mask, like append the synoyums words.
-    uint_t offset;      //offset index.
-    fstring word;
-    //fstring py;       //pinyin of the word.(invalid)
-    friso_array_t syn;  //synoyums words.
-    friso_array_t pos;  //part of speech.
-    uint_t fre;         //single word frequency.
-} lex_entry_cdt;
-typedef lex_entry_cdt * lex_entry_t;
-
-
-/*the segmentation token entry.*/
-#define __HITS_WORD_LENGTH__ 64
-
-typedef struct {
-    uchar_t type;    //type of the word. (item of friso_lex_t)
-    uchar_t length;  //length of the token.
-    uchar_t rlen;    //the real length of the token.(in orgin string)
-    char pos;        //part of speech.
-    int offset;      //start offset of the word.
-    char word[__HITS_WORD_LENGTH__];
-    //char py[0];
-} friso_token_entry;
-typedef friso_token_entry * friso_token_t;
-
-
-/*
- * Type: friso_task_entry
- * This type used to represent the current segmentation content.
- *         like the text to split, and the current index, token buffer eg....
- */
-//action control mask for #FRISO_TASK_T#.
-#define _TASK_CHECK_CF_MASK (1 << 0)     //Wether to check the chinese fraction.
-#define _TASK_START_SS_MASK (1 << 1)    //Wether to start the secondary segmentation.
-#define task_ssseg_open(task)   task->ctrlMask |= _TASK_START_SS_MASK
-#define task_ssseg_close(task)  task->ctrlMask &= ~_TASK_START_SS_MASK
-#define task_ssseg_check(task)  ((task->ctrlMask & _TASK_START_SS_MASK) != 0)
-typedef struct {
-    fstring text;           //text to tokenize
-    uint_t idx;             //start offset index.
-    uint_t length;          //length of the text.
-    uint_t bytes;           //latest word bytes in C.
-    uint_t unicode;         //latest word unicode number.
-    uint_t ctrlMask;        //action control mask.
-    friso_link_t pool;      //task pool.
-    string_buffer_t sbuf;   //string buffer.
-    friso_token_t token;    //token result token;
-    char buffer[7];         //word buffer. (1-6 bytes for an utf-8 word in C).
-} friso_task_entry;
-typedef friso_task_entry * friso_task_t;
-
-
-/* task configuration entry.*/
-#define _FRISO_KEEP_PUNC_LEN 13
-#define friso_en_kpunc(config, ch) (strchr(config->kpuncs, ch) != 0)
-//typedef friso_token_t ( * friso_next_hit_fn ) ( friso_t, void *, friso_task_t );
-//typedef lex_entry_t  ( * friso_next_lex_fn ) ( friso_t, void *, friso_task_t );
-struct friso_config_struct {
-    ushort_t max_len;            //the max match length (4 - 7).
-    ushort_t r_name;            //1 for open chinese name recognition 0 for close it.
-    ushort_t mix_len;            //the max length for the CJK words in a mix string.
-    ushort_t lna_len;            //the max length for the chinese last name adron.
-    ushort_t add_syn;            //append synonyms tokenizer words.
-    ushort_t clr_stw;            //clear the stopwords.
-    ushort_t keep_urec;         //keep the unrecongnized words.
-    ushort_t spx_out;            //use sphinx output customize.
-    ushort_t en_sseg;            //start the secondary segmentation.
-    ushort_t st_minl;            //min length of the secondary segmentation token.
-    uint_t nthreshold;            //the threshold value for a char to make up a chinese name.
-    friso_mode_t mode;            //Complex mode or simple mode
-
-    //pointer to the function to get the next token
-    friso_token_t (*next_token)(friso_t, struct friso_config_struct *, friso_task_t);
-    //pointer to the function to get the next cjk lex_entry_t
-    lex_entry_t (*next_cjk)(friso_t, struct friso_config_struct *, friso_task_t);
-
-    char kpuncs[_FRISO_KEEP_PUNC_LEN]; //keep punctuations buffer.
-};
-typedef struct friso_config_struct friso_config_entry;
-typedef friso_config_entry * friso_config_t;
-
-
-
-/*
- * Function: friso_new;
- * Usage: vars = friso_new( void );
- * --------------------------------
- * This function used to create a new empty friso friso_t;
- *        with default value.
- */
-FRISO_API friso_t friso_new(void);
-
-//creat a friso entry with a default value from a configuratile file.
-//@return 1 for successfully and 0 for failed.
-FRISO_API int friso_init_from_ifile(friso_t, friso_config_t, fstring);
-
-/*
- * Function: friso_free_vars;
- * Usage: friso_free( vars );
- * --------------------------
- * This function is used to free the allocation of the given vars.
- */
-FRISO_API void friso_free(friso_t);
-
-/*
- * Function: friso_set_dic
- * Usage: dic = friso_set_dic( vars, dic );
- * ----------------------------------------
- * This function is used to set the dictionary for friso.
- *         and firso_dic_t is the pointer of a hash table array.
- */
-//FRISO_API void friso_set_dic( friso_t, friso_dic_t );
-#define friso_set_dic(friso, dic)\
-do {\
-    friso->dic = dic;\
-} while (0)
-
-/*
- * Function: friso_set_mode
- * Usage: friso_set_mode( vars, mode );
- * ------------------------------------
- * This function is used to set the mode(complex or simple) that you want to friso to use.
- */
-FRISO_API void friso_set_mode(friso_config_t, friso_mode_t);
-
-/*create a new friso configuration entry and initialize
-  it with the default value.*/
-FRISO_API friso_config_t friso_new_config(void);
-
-//initialize the specified friso config entry with default value.
-FRISO_API void friso_init_config(friso_config_t);
-
-//free the specified friso configuration entry.
-//FRISO_API void friso_free_config( friso_config_t );
-#define friso_free_config(cfg) FRISO_FREE(cfg)
-
-/*
- * Function: friso_new_task;
- * Usage: segment = friso_new_task( void );
- * ----------------------------------------
- * This function is used to create a new friso segment type;
- */
-FRISO_API friso_task_t friso_new_task(void);
-
-/*
- * Function: friso_free_task;
- * Usage: friso_free_task( task );
- * -------------------------------
- * This function is used to free the allocation of function friso_new_segment();
- */
-FRISO_API void friso_free_task(friso_task_t);
-
-//create a new friso token
-FRISO_API friso_token_t friso_new_token(void);
-
-//free the given friso token
-//FRISO_API void friso_free_token( friso_token_t );
-#define friso_free_token(token) FRISO_FREE(token)
-
-/*
- * Function: friso_set_text
- * Usage: friso_set_text( task, text );
- * ------------------------------------
- * This function is used to set the text that is going to segment.
- */
-FRISO_API void friso_set_text(friso_task_t, fstring);
-
-
-//get the next cjk word with mmseg simple mode
-FRISO_API lex_entry_t next_simple_cjk(friso_t, friso_config_t, friso_task_t);
-
-//get the next cjk word with mmseg complex mode(mmseg core algorithm)
-FRISO_API lex_entry_t next_complex_cjk(friso_t, friso_config_t, friso_task_t);
-
-/*
- * Function: next_mmseg_token
- * Usage: word = next_mmseg_token( vars, seg );
- * --------------------------------------
- * This function is used to get next word that friso segmented
- *     with a split mode of __FRISO_SIMPLE_MODE__ or __FRISO_COMPLEX_MODE__
- */
-FRISO_API friso_token_t next_mmseg_token(friso_t, friso_config_t, friso_task_t);
-
-//__FRISO_DETECT_MODE__
-FRISO_API friso_token_t next_detect_token(friso_t, friso_config_t, friso_task_t);
-/* }}} friso main interface define :: end*/
-
-/* {{{ lexicon interface define :: start*/
-
-/*
- * Function: friso_dic_new
- * Usage: dic = friso_new_dic();
- * -----------------------------
- * This function used to create a new dictionary.(memory allocation).
- */
-FRISO_API friso_dic_t friso_dic_new(void);
-
-FRISO_API fstring file_get_line(fstring, FILE *);
-
-/*
- * Function: friso_dic_free
- * Usage: friso_dic_free( void );
- * ------------------------------
- * This function is used to free all the allocation of friso_dic_new.
- */
-FRISO_API void friso_dic_free(friso_dic_t);
-
-//create a new lexicon entry.
-FRISO_API lex_entry_t new_lex_entry(fstring, friso_array_t, uint_t, uint_t, uint_t);
-
-//free the given lexicon entry.
-//free all the allocations that its synonyms word's items pointed to
-//when the second arguments is 1
-FRISO_API void free_lex_entry_full(lex_entry_t);
-FRISO_API void free_lex_entry(lex_entry_t);
-
-/*
- * Function: friso_dic_load
- * Usage: friso_dic_load( friso, friso_lex_t, path, length );
- * --------------------------------------------------
- * This function is used to load dictionary from a given path.
- *         no length limit when length less than 0.
- */
-FRISO_API void friso_dic_load(friso_t, friso_config_t,
-                              friso_lex_t, fstring, uint_t);
-
-/*
- * load the lexicon configuration file.
- *    and load all the valid lexicon from the conf file.
- */
-FRISO_API void friso_dic_load_from_ifile(friso_t, friso_config_t, fstring, uint_t);
-
-/*
- * Function: friso_dic_match
- * Usage: friso_dic_add( dic, friso_lex_t, word, syn );
- * ----------------------------------------------
- * This function used to put new word into the dictionary.
- */
-FRISO_API void friso_dic_add(friso_dic_t, friso_lex_t, fstring, friso_array_t);
-
-/*
- * Function: friso_dic_add_with_fre
- * Usage: friso_dic_add_with_fre( dic, friso_lex_t, word, value, syn, fre );
- * -------------------------------------------------------------------
- * This function used to put new word width frequency into the dictionary.
- */
-FRISO_API void friso_dic_add_with_fre(friso_dic_t, friso_lex_t, fstring, friso_array_t, uint_t);
-
-/*
- * Function: friso_dic_match
- * Usage: result = friso_dic_match( dic, friso_lex_t, word );
- * ----------------------------------------------------
- * This function is used to check the given word is in the dictionary or not.
- */
-FRISO_API int friso_dic_match(friso_dic_t, friso_lex_t, fstring);
-
-/*
- * Function: friso_dic_get
- * Usage: friso_dic_get( dic, friso_lex_t, word );
- * -----------------------------------------
- * This function is used to search the specified lex_entry_t.
- */
-FRISO_API lex_entry_t friso_dic_get(friso_dic_t, friso_lex_t, fstring);
-
-/*
- * Function: friso_spec_dic_size
- * Usage: friso_spec_dic_size( dic, friso_lex_t )
- * This function is used to get the size of the dictionary with a specified type.
- */
-FRISO_API uint_t friso_spec_dic_size(friso_dic_t, friso_lex_t);
-FRISO_API uint_t friso_all_dic_size(friso_dic_t);
-/* }}} lexicon interface define :: end*/
-
-#endif /*end ifndef*/
--- a/libfriso/friso/src/friso_API.h
+++ b/libfriso/friso/src/friso_API.h
@ -1,412 +0,0 @@
-/*
- * friso ADT application interface header source file.
- * 1. string bufffer interface.
- * 2. hashmap interface.
- * 3. dynamaic array interface.
- * 4. double link list interface.
- *
- * @author chenxin <chenxin619315@gmail.com>
- */
-
-#ifndef _friso_api_h
-#define _friso_api_h
-
-#include <stdio.h>
-#include <stdlib.h>
-
-//yat, just take it as this way, 99 percent you will find no problem
-#if ( defined(_WIN32) || defined(_WINDOWS_) || defined(__WINDOWS_) )
-#    define FRISO_WINNT
-#else
-#    define FRISO_LINUX
-#endif
-
-#ifdef FRISO_WINNT
-#    define FRISO_API extern __declspec(dllexport)
-#    define __STATIC_API__ static
-#else
-/*platform shared library statement :: unix*/
-#    define FRISO_API extern
-#    define __STATIC_API__ static inline
-#endif
-
-#define ___ALLOCATION_ERROR___                             \
-    printf("Unable to do the memory allocation, program will now exit\n" );    \
-exit(1);
-
-#define print(str) printf("%s", str )
-#define println(str) printf("%s\n", str )
-
-/*
- * memory allocation macro definition which make it more more convenient
- * to change to use your favorite or a better memory manage library.
- */
-#define FRISO_CALLOC(_bytes, _blocks) calloc(_bytes, _blocks)
-#define FRISO_MALLOC(_bytes) malloc(_bytes)
-#define FRISO_FREE(_ptr) free( _ptr )
-
-typedef unsigned short ushort_t;
-typedef unsigned char uchar_t;
-typedef unsigned int uint_t;
-typedef char * fstring;
-
-
-
-
-/* {{{ fstring handle interface define::start. */
-#define __CHAR_BYTES__ 8
-#define __BUFFER_DEFAULT_LENGTH__ 16
-
-typedef struct {
-    fstring buffer;
-    uint_t length;
-    uint_t allocs;
-} string_buffer_entry;
-
-typedef string_buffer_entry * string_buffer_t;
-
-//FRISO_API string_buffer_t new_string_buffer( void );
-#define new_string_buffer() \
-    new_string_buffer_with_opacity( __DEFAULT_ARRAY_LIST_OPACITY__ );
-FRISO_API string_buffer_t new_string_buffer_with_opacity(uint_t);
-FRISO_API string_buffer_t new_string_buffer_with_string(fstring str);
-
-/*
- * this function will copy the chars that the fstring pointed.
- *        to the buffer.
- * this may cause the resize action of the buffer.
- */
-FRISO_API void string_buffer_append(string_buffer_t, fstring);
-FRISO_API void string_buffer_append_char(string_buffer_t, char);
-
-//insert the given fstring from the specified position.
-FRISO_API void string_buffer_insert(string_buffer_t, uint_t idx, fstring);
-
-//remove the char in the specified position.
-FRISO_API fstring string_buffer_remove(string_buffer_t, uint_t idx, uint_t);
-
-/*
- * turn the string_buffer to a string.
- *        or return the buffer of the string_buffer.
- */
-FRISO_API string_buffer_t string_buffer_trim(string_buffer_t);
-
-/*
- * free the given fstring buffer.
- *        and this function will not free the allocations of the
- *        the string_buffer_t->buffer, we return it to you, if there is
- *     a necessary you could free it youself by calling free();
- */
-FRISO_API fstring string_buffer_devote(string_buffer_t);
-
-/*
- * clear the given fstring buffer.
- *        reset its buffer with 0 and reset its length to 0.
- */
-FRISO_API void string_buffer_clear(string_buffer_t);
-
-//free the fstring buffer include the buffer.
-FRISO_API void free_string_buffer(string_buffer_t);
-
-/**
- * fstring specified chars tokenizer functions
- *
- * @date 2013-06-08
- */
-typedef struct {
-    fstring source;
-    uint_t srcLen;
-    fstring delimiter;
-    uint_t delLen;
-    uint_t idx;
-} string_split_entry;
-typedef string_split_entry * string_split_t;
-
-/**
- * create a new string_split_entry.
- *
- * @param    source
- * @return    string_split_t;
- */
-FRISO_API string_split_t new_string_split(fstring, fstring);
-
-FRISO_API void string_split_reset(string_split_t, fstring, fstring);
-
-FRISO_API void string_split_set_source(string_split_t, fstring);
-
-FRISO_API void string_split_set_delimiter(string_split_t, fstring);
-
-FRISO_API void free_string_split(string_split_t);
-
-/**
- * get the next split fstring, and copy the
- *     splited fstring into the __dst buffer .
- *
- * @param    string_split_t
- * @param    __dst
- * @return    fstring (NULL if reach the end of the source
- *         or there is no more segmentation)
- */
-FRISO_API fstring string_split_next(string_split_t, fstring);
-/* }}} */
-
-
-
-
-/* {{{ dynamaic array interface define::start*/
-#define __DEFAULT_ARRAY_LIST_OPACITY__ 8
-
-/*friso array list entry struct*/
-typedef struct {
-    void **items;
-    uint_t allocs;
-    uint_t length;
-} friso_array_entry;
-
-typedef friso_array_entry * friso_array_t;
-
-//create a new friso dynamic array.
-//FRISO_API friso_array_t new_array_list( void );
-#define new_array_list() new_array_list_with_opacity(__DEFAULT_ARRAY_LIST_OPACITY__)
-
-//create a new friso dynamic array with the given opacity
-FRISO_API friso_array_t new_array_list_with_opacity(uint_t);
-
-/*
- * free the given friso array.
- *     and its items, but never where the items's item to pointed to .
- */
-FRISO_API void free_array_list(friso_array_t);
-
-//add a new item to the array.
-FRISO_API void array_list_add(friso_array_t, void *);
-
-//insert a new item at a specifed position.
-FRISO_API void array_list_insert(friso_array_t, uint_t, void *);
-
-//get a item at a specified position.
-FRISO_API void *array_list_get(friso_array_t, uint_t);
-
-/*
- * set the item at a specified position.
- *     this will return the old value.
- */
-FRISO_API void *array_list_set(friso_array_t, uint_t, void *);
-
-/*
- * remove the given item at a specified position.
- *    this will return the value of the removed item.
- */
-FRISO_API void *array_list_remove(friso_array_t, uint_t);
-
-/*trim the array list for final use.*/
-FRISO_API friso_array_t array_list_trim(friso_array_t);
-
-/*
- * clear the array list.
- *     this function will free all the allocations that the pointer pointed.
- *        but will not free the point array allocations,
- *        and will reset the length of it.
- */
-FRISO_API friso_array_t array_list_clear(friso_array_t);
-
-//return the size of the array.
-//FRISO_API uint_t array_list_size( friso_array_t );
-#define array_list_size( array ) array->length
-
-//return the allocations of the array.
-//FRISO_API uint_t array_list_allocs( friso_array_t );
-#define array_list_allocs( array ) array->allocs
-
-//check if the array is empty.
-//FRISO_API int array_list_empty( friso_array_t );
-#define array_list_empty( array ) ( array->length == 0 )
-/* }}} dynamaic array interface define::end*/
-
-
-
-
-/* {{{ link list interface define::start*/
-struct friso_link_node {
-    void *value;
-    struct friso_link_node *prev;
-    struct friso_link_node *next;
-};
-typedef struct friso_link_node link_node_entry;
-typedef link_node_entry * link_node_t;
-
-/*
- * link list adt
- */
-typedef struct {
-    link_node_t head;
-    link_node_t tail;
-    uint_t size;
-} friso_link_entry;
-
-typedef friso_link_entry * friso_link_t;
-
-//create a new link list
-FRISO_API friso_link_t new_link_list(void);
-
-//free the specified link list
-FRISO_API void free_link_list(friso_link_t);
-
-//return the size of the current link list.
-//FRISO_API uint_t link_list_size( friso_link_t );
-#define link_list_size( link ) link->size
-
-//check the given link is empty or not.
-//FRISO_API int link_list_empty( friso_link_t );
-#define link_list_empty( link ) (link->size == 0)
-
-//clear all the nodes in the link list( except the head and the tail ).
-FRISO_API friso_link_t link_list_clear(friso_link_t link);
-
-//add a new node to the link list.(append from the tail)
-FRISO_API void link_list_add(friso_link_t, void *);
-
-//add a new node before the specified node
-FRISO_API void link_list_insert_before(friso_link_t, uint_t, void *);
-
-//get the node in the current index.
-FRISO_API void *link_list_get(friso_link_t, uint_t);
-
-//modify the node in the current index.
-FRISO_API void *link_list_set(friso_link_t, uint_t, void *);
-
-//remove the specified link node
-FRISO_API void *link_list_remove(friso_link_t, uint_t);
-
-//remove the given node
-FRISO_API void *link_list_remove_node(friso_link_t, link_node_t);
-
-//remove the node from the frist.
-FRISO_API void *link_list_remove_first(friso_link_t);
-
-//remove the last node from the link list
-FRISO_API void *link_list_remove_last(friso_link_t);
-
-//append a node from the end.
-FRISO_API void link_list_add_last(friso_link_t, void *);
-
-//add a node at the begining of the link list.
-FRISO_API void link_list_add_first(friso_link_t, void *);
-/* }}} link list interface define::end*/
-
-
-
-
-/* {{{ hashtable interface define :: start*/
-struct hash_entry {
-    fstring _key;                    //the node key
-    void * _val;                    //the node value
-    struct hash_entry * _next;
-};
-typedef struct hash_entry friso_hash_entry;
-typedef friso_hash_entry * hash_entry_t;
-typedef void (*fhash_callback_fn_t)(hash_entry_t);
-
-typedef struct {
-    uint_t length;
-    uint_t size;
-    float factor;
-    uint_t threshold;
-    hash_entry_t *table;
-} friso_hash_cdt;
-
-typedef friso_hash_cdt * friso_hash_t;
-
-//default value for friso_hash_cdt
-#define DEFAULT_LENGTH     31
-#define DEFAULT_FACTOR    0.85f
-
-/*
- * Function: new_hash_table
- * Usage: table = new_hash_table();
- * --------------------------------
- * this function allocates a new symbol table with no entries.
- */
-FRISO_API friso_hash_t new_hash_table(void);
-
-/*
- * Function: free_hash_table
- * Usage: free_hash_table( table );
- * --------------------------------------
- * this function will free all the allocation for memory.
- */
-FRISO_API void free_hash_table(friso_hash_t, fhash_callback_fn_t);
-
-/*
- * Function: put_new_mapping
- * Usage: put_mapping( table, key, value );
- * ----------------------------------------
- * the function associates the specified key with the given value.
- */
-FRISO_API void *hash_put_mapping(friso_hash_t, fstring, void *);
-
-/*
- * Function: is_mapping_exists
- * Usage: bool = is_mapping_exists( table, key );
- * ----------------------------------------------
- * this function check the given key mapping is exists or not.
- */
-FRISO_API int hash_exist_mapping(friso_hash_t, fstring);
-
-/*
- * Function: get_mapping_value
- * Usage: value = get_mapping_value( table, key );
- * -----------------------------------------------
- * this function return the value associated with the given key.
- *         UNDEFINED will be return if the mapping is not exists.
- */
-FRISO_API void * hash_get_value(friso_hash_t, fstring);
-
-/*
- * Function: remove_mapping
- * Usage: remove_mapping( table, key );
- * ------------------------------------
- * This function is used to remove the mapping associated with the given key.
- */
-FRISO_API hash_entry_t hash_remove_mapping(friso_hash_t, fstring);
-
-/*
- * Function: get_table_size
- * Usage: size = get_table_size( table );
- * --------------------------------------
- * This function is used to count the size of the specified table.
- */
-//FRISO_API uint_t hash_get_size( friso_hash_t );
-#define hash_get_size( hash ) hash->size
-/* }}} hashtable interface define :: end*/
-
-
-
-
-/* {{{ utf8 string interface define :: start*/
-
-/*
- * Function: get_utf8_bytes
- *
- * */
-FRISO_API int get_utf8_bytes(char);
-
-
-/*
- * Function: get_utf8_unicode
- *
- * */
-FRISO_API int get_utf8_unicode(const fstring);
-
-
-/*
- * Function: unicode_to_utf8
- *
- * */
-FRISO_API int unicode_to_utf8(uint_t, fstring);
-
-
-/* }}} utf8 string interface define :: start*/
-
-
-#endif /*end ifndef*/
--- a/libfriso/friso/src/friso_GBK.c
+++ b/libfriso/friso/src/friso_GBK.c
@ -1,283 +0,0 @@
-/**
- * Friso GBK serial functions implementation source file.
- * @package src/friso_GBK.c .
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "friso_API.h"
-#include "friso_ctype.h"
-
-/* read the next GBK word from the specified position.
- *
- * @return int    the bytes of the current readed word.
- */
-FRISO_API int gbk_next_word(
-    friso_task_t task,
-    uint_t *idx,
-    fstring __word) {
-    int c;
-    if(*idx >= task->length) return 0;
-
-    c = (uchar_t)task->text[*idx];
-    if(c <= 0x80) {
-        task->bytes = 1;
-    } else {
-        task->bytes = 2;
-    }
-
-    //copy the word to the buffer.
-    memcpy(__word, task->text + (*idx), task->bytes);
-    (*idx) += task->bytes;
-    __word[task->bytes] = '\0';
-
-    return task->bytes;
-}
-
-//get the bytes of a gbk char.
-//FRISO_API int get_gbk_bytes( char c )
-//{
-//    return 1;
-//}
-
-//check if the given buffer is a gbk word (ANSII string).
-//    included the simplified and traditional words.
-FRISO_API int gbk_cn_string(char *str) {
-    int c1 = (uchar_t) str[0];
-    int c2 = (uchar_t) str[1];
-    //GBK/2: gb2312 chinese word.
-    return (((c1 >= 0xb0 && c1 <= 0xf7)
-             && (c2 >= 0xa1 && c2 <= 0xfe))
-            //GBK/3: extend chinese words.
-            || ((c1 >= 0x81 && c1 <= 0xa0)
-                && ((c2 >= 0x40 && c2 <= 0x7e)
-                    || (c2 >= 0x80 && c2 <= 0xfe)))
-            //GBK/4: extend chinese words.
-            || ((c1 >= 0xaa && c1 <= 0xfe)
-                && ((c2 >= 0x40 && c2 <= 0xfe)
-                    || (c2 >= 0x80 && c2 <= 0xa0))));
-}
-
-/*check if the given char is a ASCII letter
- *     include all the arabic number, letters and english puntuations.*/
-FRISO_API int gbk_halfwidth_en_char(char c) {
-    int u = (uchar_t) c;
-    return (u >= 32 && u <= 126);
-}
-
-/*
- * check if the given char is a full-width latain.
- *    include the full-width arabic numeber, letters.
- *        but not the full-width puntuations.
- */
-FRISO_API int gbk_fullwidth_en_char(char *str) {
-    int c1 = (uchar_t) str[0];
-    int c2 = (uchar_t) str[1];
-    return ((c1 == 0xA3)
-            && ((c2 >= 0xB0 && c2 <= 0xB9)          //arabic numbers.
-                || (c2 >= 0xC1 && c2 <= 0xDA)           //uppercase letters.
-                || (c2 >= 0xE1 && c2 <= 0xFA)));       //lowercase letters.
-}
-
-//check if the given char is a upper case english letter.
-//    included the full-width and half-width letters.
-FRISO_API int gbk_uppercase_letter(char *str) {
-    int c1 = (uchar_t) str[0];
-    int c2 = (uchar_t) str[1];
-    if(c1 <= 0x80) {    //half-width
-        return (c1 >= 65 && c1 <= 90);
-    } else {            //full-width
-        return (c1 == 0xa3 && (c2 >= 0xc1 && c2 <= 0xda));
-    }
-}
-
-//check if the given char is a lower case char.
-//    included the full-width and half-width letters.
-FRISO_API int gbk_lowercase_letter(char *str) {
-    int c1 = (uchar_t) str[0];
-    int c2 = (uchar_t) str[1];
-    if(c1 <= 0x80) {    //half-width
-        return (c1 >= 97 && c1 <= 122);
-    } else {           //full-width
-        return (c1 == 0xa3 && (c2 >= 0xe1 && c2 <= 0xfa));
-    }
-}
-
-//check if the given char is a arabic numeric.
-//    included the full-width and half-width arabic numeric.
-FRISO_API int gbk_numeric_letter(char *str) {
-    int c1 = (uchar_t) str[0];
-    int c2 = (uchar_t) str[1];
-    if(c1 <= 0x80) {    //half-width
-        return (c1 >= 48 && c1 <= 57);
-    } else {            //full-width
-        return ((c1 == 0xa3) && (c2 >= 0xb0 && c2 <= 0xb9));
-    }
-}
-
-/*
- * check if the given fstring is make up with numeric chars.
- *     both full-width,half-width numeric is ok.
- */
-FRISO_API int gbk_numeric_string(char *str) {
-    char *s = str;
-    int c1 = 0;
-    int c2 = 0;
-
-    while(*s != '\0') {
-        c1 = (uchar_t)(*s++);
-        if(c1 <= 0x80) {        //half-width
-            if(c1 < 48 || c2 > 57) return 0;
-        } else {            //full-width
-            if(c1 != 0xa3) return 0;
-            c2 = (uchar_t)(*s++);
-            if(c2 < 0xb0 || c2 > 0xb9) return 0;
-        }
-    }
-
-    return 1;
-}
-
-FRISO_API int gbk_decimal_string(char *str) {
-    int c1 = 0;
-    int c2 = 0;
-    int len = strlen(str), i, p = 0;
-
-    //point header check.
-    if(str[0] == '.' || str[len - 1] == '.') return 0;
-
-    for(i = 0; i < len;) {
-        c1 = (uchar_t) str[i++];
-        //count the number of the points.
-        if(c1 == 46) {
-            p++;
-            continue;
-        }
-
-        if(c1 <= 0x80) {    //half-width
-            if(c1 < 48 || c1 > 57) return 0;
-        } else {            //full-width
-            if(c1 != 0xa3) return 0;
-            c2 = (uchar_t) str[i++];
-            if(c2 < 0xb0 || c2 > 0xb9) return 0;
-        }
-    }
-
-    return (p == 1);
-}
-
-//check if the given char is a english(ASCII) letter.
-//    (full-width and half-width), not the punctuation/arabic of course.
-FRISO_API int gbk_en_letter(char *str) {
-    int c1 = (uchar_t) str[0];
-    int c2 = (uchar_t) str[1];
-    if(c1 <= 0x80) {
-        return ((c1 >= 65 && c1 <= 90)          //lowercase
-                || (c1 >= 97 && c1 <= 122));        //uppercase
-    } else {
-        return ((c1 == 0xa3)
-                && ((c2 >= 0xc1 && c2 <= 0xda)        //lowercase
-                    || (c2 >= 0xe1 && c2 <= 0xfa)));        //uppercase
-    }
-
-    return 0;
-}
-
-//check the given char is a whitespace or not.
-//    included full-width and half-width whitespace.
-FRISO_API int gbk_whitespace(char *str) {
-    int c1 = (uchar_t) str[0];
-    int c2 = (uchar_t) str[1];
-    if(c1 <= 0x80) {
-        return (c1 == 32);
-    } else {
-        return (c1 == 0xa3 && c2 == 0xa0);
-    }
-}
-
-/* check if the given char is a letter number like 'ⅠⅡ'
- */
-FRISO_API int gbk_letter_number(char *str) {
-    int c1 = (uchar_t) str[0];
-    int c2 = (uchar_t) str[1];
-    return ((c1 == 0xa2)
-            && ((c2 >= 0xa1 && c2 <= 0xb0)            //lowercase
-                || (c2 >= 0xf0 && c2 <= 0xfe)));        //uppercase
-}
-
-/*
- * check if the given char is a other number like '①⑩⑽㈩'
- */
-FRISO_API int gbk_other_number(char *str) {
-    int c1 = (uchar_t) str[0];
-    int c2 = (uchar_t) str[1];
-    return ((c1 == 0xa2) && (c2 >= 0xc5 && c2 <= 0xee));
-}
-
-//check if the given char is a english punctuation.
-FRISO_API int gbk_en_punctuation(char c) {
-    int u = (uchar_t) c;
-    return ((u > 32 && u < 48)
-            || (u > 57 && u < 65)
-            || (u > 90 && u < 97)
-            || (u > 122 && u < 127));
-}
-
-//check the given char is a chinese punctuation.
-FRISO_API int gbk_cn_punctuation(char *str) {
-    int c1 = (uchar_t) str[0];
-    int c2 = (uchar_t) str[1];
-    //full-width en punctuation.
-    return ((c1 == 0xa3 && ((c2 >= 0xa1 && c2 <= 0xaf)
-                            || (c2 >= 0xba && c2 <= 0xc0)
-                            || (c2 >= 0xdb && c2 <= 0xe0)
-                            || (c2 >= 0xfb && c2 <= 0xfe)))
-            //chinese punctuation.
-            || (c1 == 0xa1 && ((c2 >= 0xa1 && c2 <= 0xae)
-                               || (c2 >= 0xb0 && c2 <= 0xbf)))
-            //A6 area special punctuations:" "
-            || (c1 == 0xa6 && (c2 >= 0xf9 && c2 <= 0xfe))
-            //A8 area special punctuations: " ˊˋ˙–―‥‵℅ "
-            || (c1 == 0xa8 && (c2 >= 0x40 && c2 <= 0x47)));
-}
-
-/* {{{
-   '@', '$','%', '^', '&', '-', ':', '.', '/', '\'', '#', '+'
-   */
-//cause it it the same as utf-8, we use utf8's interface instead.
-//@see the friso_ctype.h#gbk_keep_punctuation macro defined.
-
-//static friso_hash_t __keep_punctuations_hash__ = NULL;
-
-/* @Deprecated
- * check the given char is an english keep punctuation.*/
-//FRISO_API int gbk_keep_punctuation( char *str )
-//{
-//    if ( __keep_punctuations_hash__ == NULL ) {
-//    __keep_punctuations_hash__ = new_hash_table();
-//    hash_put_mapping( __keep_punctuations_hash__, "@", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "$", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "%", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "^", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "&", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "-", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, ":", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, ".", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "/", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "'", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "#", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "+", NULL );
-//    }
-//    //check the hash.
-//    return hash_exist_mapping( __keep_punctuations_hash__, str );
-//}
-/* }}} */
-
-//check if the given english char is a full-width char or not.
-//FRISO_API int gbk_fullwidth_char( char *str )
-//{
-//    return 1;
-//}
--- a/libfriso/friso/src/friso_UTF8.c
+++ b/libfriso/friso/src/friso_UTF8.c
@ -1,467 +0,0 @@
-/**
- * Friso utf8 serial function implementation source file.
- * @package src/friso_UTF8.c .
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "friso_API.h"
-#include "friso_ctype.h"
-
-/* read the next utf-8 word from the specified position.
- *
- * @return int    the bytes of the current readed word.
- */
-FRISO_API int utf8_next_word(
-    friso_task_t task,
-    uint_t *idx,
-    fstring __word) {
-    if(*idx >= task->length) return 0;
-
-    //register uint_t t;
-    task->bytes = get_utf8_bytes(task->text[ *idx ]);
-
-    //for ( t = 0; t < task->bytes; t++ ) {
-    //    __word[t] = task->text[ (*idx)++ ];
-    //}
-
-    //change the loop to memcpy.
-    //it is more efficient.
-    //@date 2013-09-04
-    memcpy(__word, task->text + (*idx), task->bytes);
-    (*idx) += task->bytes;
-    __word[task->bytes] = '\0';
-
-    //the unicode counter was moved here from version 1.6.0
-    task->unicode = get_utf8_unicode(__word);
-
-    return task->bytes;
-}
-
-/*
- * print a character in a binary style.
- *
- * @param int
- */
-FRISO_API void print_char_binary(char value) {
-    register uint_t t;
-
-    for(t = 0; t < __CHAR_BYTES__; t++) {
-        if((value & 0x80) == 0x80) {
-            printf("1");
-        } else {
-            printf("0");
-        }
-        value <<= 1;
-    }
-}
-
-/*
- * get the bytes of a utf-8 char.
- *         between 1 - 6.
- *
- * @param __char
- * @return int
- */
-FRISO_API int get_utf8_bytes(char value) {
-    register uint_t t = 0;
-
-    //one byte ascii char.
-    if((value & 0x80) == 0) return 1;
-    for(; (value & 0x80) != 0; value <<= 1) {
-        t++;
-    }
-
-    return t;
-}
-
-/*
- * get the unicode serial of a utf-8 char.
- *
- * @param  ch
- * @return int.
- */
-FRISO_API int get_utf8_unicode(const fstring ch) {
-    int code = 0, bytes = get_utf8_bytes(*ch);
-    register uchar_t *bit = (uchar_t *) &code;
-    register char b1, b2, b3;
-
-    switch(bytes) {
-    case 1:
-        *bit = *ch;
-        break;
-    case 2:
-        b1 = *ch;
-        b2 = *(ch + 1);
-
-        *bit         = (b1 << 6) + (b2 & 0x3F);
-        *(bit + 1)     = (b1 >> 2) & 0x07;
-        break;
-    case 3:
-        b1 = *ch;
-        b2 = *(ch + 1);
-        b3 = *(ch + 2);
-
-        *bit        = (b2 << 6) + (b3 & 0x3F);
-        *(bit + 1)    = (b1 << 4) + ((b2 >> 2) & 0x0F);
-        break;
-        //ignore the ones that are larger than 3 bytes;
-    }
-
-    return code;
-}
-
-//turn the unicode serial to a utf-8 string.
-FRISO_API int unicode_to_utf8(uint_t u, fstring __word) {
-    if(u <= 0x0000007F) {
-        //U-00000000 - U-0000007F
-        //0xxxxxxx
-        *__word = (u & 0x7F);
-        return 1;
-    } else if(u >= 0x00000080 && u <= 0x000007FF) {
-        //U-00000080 - U-000007FF
-        //110xxxxx 10xxxxxx
-        *(__word + 1) = (u & 0x3F) | 0x80;
-        *__word         = ((u >> 6) & 0x1F) | 0xC0;
-        return 2;
-    } else if(u >= 0x00000800 && u <= 0x0000FFFF) {
-        //U-00000800 - U-0000FFFF
-        //1110xxxx 10xxxxxx 10xxxxxx
-        *(__word + 2) = (u & 0x3F) | 0x80;
-        *(__word + 1) = ((u >> 6) & 0x3F) | 0x80;
-        *__word         = ((u >> 12) & 0x0F) | 0xE0;
-        return 3;
-    } else if(u >= 0x00010000 && u <= 0x001FFFFF) {
-        //U-00010000 - U-001FFFFF
-        //11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-        *(__word + 3) = (u & 0x3F) | 0x80;
-        *(__word + 2) = ((u >>  6) & 0x3F) | 0x80;
-        *(__word + 1) = ((u >> 12) & 0x3F) | 0x80;
-        *__word         = ((u >> 18) & 0x07) | 0xF0;
-        return 4;
-    } else if(u >= 0x00200000 && u <= 0x03FFFFFF) {
-        //U-00200000 - U-03FFFFFF
-        //111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-        *(__word + 4) = (u & 0x3F) | 0x80;
-        *(__word + 3) = ((u >>  6) & 0x3F) | 0x80;
-        *(__word + 2) = ((u >> 12) & 0x3F) | 0x80;
-        *(__word + 1) = ((u >> 18) & 0x3F) | 0x80;
-        *__word         = ((u >> 24) & 0x03) | 0xF8;
-        return 5;
-    } else if(u >= 0x04000000 && u <= 0x7FFFFFFF) {
-        //U-04000000 - U-7FFFFFFF
-        //1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-        *(__word + 5) = (u & 0x3F) | 0x80;
-        *(__word + 4) = ((u >>  6) & 0x3F) | 0x80;
-        *(__word + 3) = ((u >> 12) & 0x3F) | 0x80;
-        *(__word + 2) = ((u >> 18) & 0x3F) | 0x80;
-        *(__word + 1) = ((u >> 24) & 0x3F) | 0x80;
-        *__word         = ((u >> 30) & 0x01) | 0xFC;
-        return 6;
-    }
-
-    return 0;
-}
-
-/*
- * check the given char is a CJK char or not.
- *     2E80-2EFF CJK 部首补充
- *     2F00-2FDF 康熙字典部首
- *     3000-303F CJK 符号和标点                 --ignore
- *     31C0-31EF CJK 笔画
- *     3200-32FF 封闭式 CJK 文字和月份             --ignore.
- *     3300-33FF CJK 兼容
- *     3400-4DBF CJK 统一表意符号扩展 A
- *     4DC0-4DFF 易经六十四卦符号
- *     4E00-9FBF CJK 统一表意符号
- *     F900-FAFF CJK 兼容象形文字
- *     FE30-FE4F CJK 兼容形式
- *     FF00-FFEF 全角ASCII、全角标点            --ignore (as basic latin)
- *
- * Japanese:
- *     3040-309F 日本平假名
- *     30A0-30FF 日本片假名
- *     31F0-31FF 日本片假名拼音扩展
- *
- * Korean:
- *     AC00-D7AF 韩文拼音
- *     1100-11FF 韩文字母
- *     3130-318F 韩文兼容字母
- *
- * @param ch :pointer to the char
- * @return int : 1 for yes and 0 for not.
- */
-
-//Comment one of the following macro define
-//to clear the check of the specified language.
-#define FRISO_CJK_CHK_C
-//#define FRISO_CJK_CHK_J
-//#define FRISO_CJK_CHK_K
-FRISO_API int utf8_cjk_string(uint_t u) {
-    int c = 0, j = 0, k = 0;
-    //Chinese.
-#ifdef FRISO_CJK_CHK_C
-    c = ((u >= 0x4E00 && u <= 0x9FBF)
-         || (u >= 0x2E80 && u <= 0x2EFF) || (u >= 0x2F00 && u <= 0x2FDF)
-         || (u >= 0x31C0 && u <= 0x31EF)   //|| ( u >= 0x3200 && u <= 0x32FF )
-         || (u >= 0x3300 && u <= 0x33FF)   //|| ( u >= 0x3400 && u <= 0x4DBF )
-         || (u >= 0x4DC0 && u <= 0x4DFF) || (u >= 0xF900 && u <= 0xFAFF)
-         || (u >= 0xFE30 && u <= 0xFE4F));
-#endif
-
-    //Japanese.
-#ifdef FRISO_CJK_CHK_J
-    j = ((u >= 0x3040 && u <= 0x309F)
-         || (u >= 0x30A0 && u <= 0x30FF) || (u >= 0x31F0 && u <= 0x31FF));
-#endif
-
-    //Korean
-#ifdef FRISO_CJK_CHK_K
-    k = ((u >= 0xAC00 && u <= 0xD7AF)
-         || (u >= 0x1100 && u <= 0x11FF) || (u >= 0x3130 && u <= 0x318F));
-#endif
-
-    return (c || j || k);
-}
-
-/*
- * check the given char is a Basic Latin letter or not.
- *    include all the letters and english punctuations.
- *
- * @param c
- * @return int 1 for yes and 0 for not.
- */
-FRISO_API int utf8_halfwidth_en_char(uint_t u) {
-    return (u >= 32 && u <= 126);
-}
-
-/*
- * check the given char is a full-width latain or not.
- *    include the full-width arabic numeber, letters.
- *    but not the full-width punctuations.
- *
- * @param c
- * @return int
- */
-FRISO_API int utf8_fullwidth_en_char(uint_t u) {
-    return ((u >= 65296 && u <= 65305)               //arabic number
-            || (u >= 65313 && u <= 65338)                  //upper case letters
-            || (u >= 65345 && u <= 65370));               //lower case letters
-}
-
-//check the given char is a upper case letters or not.
-//    included the full-width and half-width letters.
-FRISO_API int utf8_uppercase_letter(uint_t u) {
-    if(u > 65280) u -= 65248;
-    return (u >= 65 && u <= 90);
-}
-
-//check the given char is a upper case letters or not.
-//    included the full-width and half-width letters.
-FRISO_API int utf8_lowercase_letter(uint_t u) {
-    if(u > 65280) u -= 65248;
-    return (u >= 97 && u <= 122);
-}
-
-//check the given char is a numeric
-//    included the full-width and half-width arabic numeric.
-FRISO_API int utf8_numeric_letter(uint_t u) {
-    if(u > 65280) u -= 65248;       //make full-width half-width.
-    return ((u >= 48 && u <= 57));
-}
-
-//check the given char is a english letter.(included the full-width)
-//    not the punctuation of course.
-FRISO_API int utf8_en_letter(uint_t u) {
-    if(u > 65280) u -= 65248;
-    return ((u >= 65 && u <= 90)
-            || (u >= 97 && u <= 122));
-}
-
-/*
- * check if the given fstring is make up with numeric.
- *    both full-width,half-width numeric is ok.
- *
- * @param str
- * @return int
- * 65296, ０
- * 65297, １
- * 65298, ２
- * 65299, ３
- * 65300, ４
- * 65301, ５
- * 65302, ６
- * 65303, ７
- * 65304, ８
- * 65305, ９
- */
-FRISO_API int utf8_numeric_string(const fstring str) {
-    fstring s = str;
-    int bytes, u;
-
-    while(*s != '\0') {
-        //if ( ! utf8_numeric_letter( get_utf8_unicode( s++ ) ) ) {
-        //    return 0;
-        //}
-
-        //new implemention.
-        //@date 2013-10-14
-        bytes = 1;
-        if(*s < 0) {    //full-width chars.
-            u = get_utf8_unicode(s);
-            bytes = get_utf8_bytes(*s);
-            if(u < 65296 || u > 65305) return 0;
-        } else if(*s < 48 || *s > 57) {
-            return 0;
-        }
-
-        s += bytes;
-    }
-
-    return 1;
-}
-
-FRISO_API int utf8_decimal_string(const fstring str) {
-    int len = strlen(str), i, p = 0;
-    int bytes = 0, u;
-
-    if(str[0] == '.' || str[len - 1] == '.') return 0;
-
-    for(i = 1; i < len; bytes = 1) {
-        //count the number of char '.'
-        if(str[i] == '.') {
-            i++;
-            p++;
-            continue;
-        } else if(str[i] < 0) {
-            //full-width numeric.
-            u = get_utf8_unicode(str + i);
-            bytes = get_utf8_bytes(str[i]);
-            if(u < 65296 || u > 65305) return 0;
-        } else if(str[i] < 48 || str[i] > 57) {
-            return 0;
-        }
-
-        i += bytes;
-    }
-
-    return (p == 1);
-}
-
-/*
- * check the given char is a whitespace or not.
- *
- * @param ch
- * @return int 1 for yes and 0 for not.
- */
-FRISO_API int utf8_whitespace(uint_t u) {
-    if(u == 32 || u == 12288) {
-        return 1;
-    }
-    return 0;
-}
-
-
-/*
- * check the given char is a english punctuation.
- *
- * @param ch
- * @return int
- */
-FRISO_API int utf8_en_punctuation(uint_t u) {
-    //if ( u > 65280 ) u = u - 65248;        //make full-width half-width
-    return ((u > 32 && u < 48)
-            || (u > 57 && u < 65)
-            || (u > 90 && u < 97)          //added @2013-08-31
-            || (u > 122 && u < 127));
-}
-
-/*
- * check the given char is a chinese punctuation.
- * @date    2013-08-31 added.
- *
- * @param ch
- * @return int
- */
-FRISO_API int utf8_cn_punctuation(uint_t u) {
-    return ((u > 65280 && u < 65296)
-            || (u > 65305 && u < 65312)
-            || (u > 65338 && u < 65345)
-            || (u > 65370 && u < 65382)
-            //cjk symbol and punctuation.(added 2013-09-06)
-            //from http://www.unicode.org/charts/PDF/U3000.pdf
-            || (u >= 12289 && u <= 12319));
-}
-
-/*
- * check if the given char is a letter number in unicode.
- *        like 'ⅠⅡ'.
- * @param ch
- * @return int
- */
-FRISO_API int utf8_letter_number(uint_t u) {
-    return 0;
-}
-
-/*
- * check if the given char is a other number in unicode.
- *        like '①⑩⑽㈩'.
- * @param ch
- * @return int
- */
-FRISO_API int utf8_other_number(uint_t u) {
-    return 0;
-}
-
-//A macro define has replace this.
-//FRISO_API int is_en_punctuation( char c )
-//{
-//    return utf8_en_punctuation( (uint_t) c );
-//}
-
-/* {{{
-   '@', '$','%', '^', '&', '-', ':', '.', '/', '\'', '#', '+'
-   */
-//static friso_hash_t __keep_punctuations_hash__ = NULL;
-
-/* @Deprecated
- * check the given char is an english keep punctuation.*/
-//FRISO_API int utf8_keep_punctuation( fstring str )
-//{
-//    if ( __keep_punctuations_hash__ == NULL )
-//    {
-//    __keep_punctuations_hash__ = new_hash_table();
-//    hash_put_mapping( __keep_punctuations_hash__, "@", NULL );
-//    //hash_put_mapping( __keep_punctuations_hash__, "$", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "%", NULL );
-//    //hash_put_mapping( __keep_punctuations_hash__, "^", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "&", NULL );
-//    //hash_put_mapping( __keep_punctuations_hash__, "-", NULL );
-//    //hash_put_mapping( __keep_punctuations_hash__, ":", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, ".", NULL );
-//    //hash_put_mapping( __keep_punctuations_hash__, "/", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "'", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "#", NULL );
-//    hash_put_mapping( __keep_punctuations_hash__, "+", NULL );
-//    }
-//    //check the hash.
-//    return hash_exist_mapping( __keep_punctuations_hash__, str );
-//}
-/* }}} */
-
-/*
- * check the given english char is a full-width char or not.
- *
- * @param ch
- * @return 1 for yes and 0 for not.
- */
-//FRISO_API int utf8_fullwidth_char( uint_t u )
-//{
-//    if ( u == 12288 )
-//    return 1;                    //full-width space
-//    //(32 - 126) ascii code
-//    return (u > 65280 && u <= 65406);
-//}
--- a/libfriso/friso/src/friso_array.c
+++ b/libfriso/friso/src/friso_array.c
@ -1,209 +0,0 @@
-/*
- * friso dynamaic Array interface implementation defined in header file "friso_API.h".
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include "friso_API.h"
-#include <stdlib.h>
-
-/* ********************************************
- * friso array list static functions block    *
- **********************************************/
-__STATIC_API__ void **create_array_entries(uint_t __blocks) {
-    register uint_t t;
-    void **block = (void **) FRISO_CALLOC(sizeof(void *), __blocks);
-    if(block == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    //initialize
-    for(t = 0; t < __blocks; t++) {
-        block[t] = NULL;
-    }
-
-    return block;
-}
-
-//resize the array. (the opacity should not be smaller than array->length)
-__STATIC_API__ friso_array_t resize_array_list(
-    friso_array_t array,
-    uint_t opacity) {
-    register uint_t t;
-    void **block = create_array_entries(opacity);
-
-    for(t = 0; t < array->length ; t++) {
-        block[t] = array->items[t];
-    }
-
-    FRISO_FREE(array->items);
-    array->items = block;
-    array->allocs = opacity;
-
-    return array;
-}
-
-
-/* ********************************************
- * friso array list FRISO_API functions block    *
- **********************************************/
-//create a new array list. (A macro define has replace this.)
-//FRISO_API friso_array_t new_array_list( void ) {
-//    return new_array_list_with_opacity( __DEFAULT_ARRAY_LIST_OPACITY__ );
-//}
-
-//create a new array list with a given opacity.
-FRISO_API friso_array_t new_array_list_with_opacity(uint_t opacity) {
-    friso_array_t array = (friso_array_t)
-                          FRISO_MALLOC(sizeof(friso_array_entry));
-    if(array == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    //initialize
-    array->items  = create_array_entries(opacity);
-    array->allocs = opacity;
-    array->length = 0;
-
-    return array;
-}
-
-/*
- * free the given friso array.
- *    and its items, but never where its items item pointed to .
- */
-FRISO_API void free_array_list(friso_array_t array) {
-    //free the allocation that all the items pointed to
-    //register int t;
-    //if ( flag == 1 ) {
-    //    for ( t = 0; t < array->length; t++ ) {
-    //        if ( array->items[t] == NULL ) continue;
-    //        FRISO_FREE( array->items[t] );
-    //        array->items[t] = NULL;
-    //    }
-    //}
-
-    FRISO_FREE(array->items);
-    FRISO_FREE(array);
-}
-
-//add a new item to the array.
-FRISO_API void array_list_add(friso_array_t array, void *value) {
-    //check the condition to resize.
-    if(array->length == array->allocs) {
-        resize_array_list(array, array->length * 2 + 1);
-    }
-    array->items[array->length++] = value;
-}
-
-//insert a new item at a specified position.
-FRISO_API void array_list_insert(
-    friso_array_t array,
-    uint_t idx,
-    void *value) {
-    register uint_t t;
-
-    if(idx <= array->length) {
-        //check the condition to resize the array.
-        if(array->length == array->allocs) {
-            resize_array_list(array, array->length * 2 + 1);
-        }
-
-        //move the elements after idx.
-        //for ( t = idx; t < array->length; t++ ) {
-        //    array->items[t+1] = array->items[t];
-        //}
-        for(t = array->length - 1; t >= idx; t--) {
-            array->items[t + 1] = array->items[t];
-        }
-
-        array->items[idx] = value;
-        array->length++;
-    }
-}
-
-//get the item at a specified position.
-FRISO_API void *array_list_get(friso_array_t array, uint_t idx) {
-    if(idx < array->length) {
-        return array->items[idx];
-    }
-    return NULL;
-}
-
-//set the value of the item at a specified position.
-//this will return the old value.
-FRISO_API void * array_list_set(
-    friso_array_t array,
-    uint_t idx,
-    void * value) {
-    void * oval = NULL;
-    if(idx < array->length) {
-        oval = array->items[idx];
-        array->items[idx] = value;
-    }
-    return oval;
-}
-
-//remove the item at a specified position.
-//this will return the value of the removed item.
-FRISO_API void * array_list_remove(
-    friso_array_t array, uint_t idx) {
-    register uint_t t;
-    void *oval = NULL;
-
-    if(idx < array->length) {
-        oval = array->items[idx];
-        //move the elements after idx.
-        for(t = idx; t < array->length - 1; t++) {
-            array->items[t] = array->items[ t + 1 ];
-        }
-        array->items[array->length - 1] = NULL;
-        array->length--;
-    }
-
-    return oval;
-}
-
-/*trim the array list*/
-FRISO_API friso_array_t array_list_trim(friso_array_t array) {
-    if(array->length < array->allocs) {
-        return resize_array_list(array, array->length);
-    }
-    return array;
-}
-
-/*
- * clear the array list.
- *     this function will free all the allocations that the pointer pointed.
- *        but will not free the point array allocations,
- *        and will reset the length of it.
- */
-FRISO_API friso_array_t array_list_clear(friso_array_t array) {
-    register uint_t t;
-    //free all the allocations that the array->length's pointer pointed.
-    for(t = 0; t < array->length; t++) {
-        /*if ( array->items[t] == NULL ) continue;
-          FRISO_FREE( array->items[t] ); */
-        array->items[t] = NULL;
-    }
-    //attribute reset.
-    array->length = 0;
-
-    return array;
-}
-
-//get the size of the array list. (A macro define has replace this.)
-//FRISO_API uint_t array_list_size( friso_array_t array ) {
-//    return array->length;
-//}
-
-//return the allocations of the array list.(A macro define has replace this)
-//FRISO_API uint_t array_list_allocs( friso_array_t array ) {
-//    return array->allocs;
-//}
-
-//check if the array is empty.(A macro define has replace this.)
-//FRISO_API int array_list_empty( friso_array_t array )
-//{
-//    return ( array->length == 0 );
-//}
--- a/libfriso/friso/src/friso_ctype.c
+++ b/libfriso/friso/src/friso_ctype.c
@ -1,244 +0,0 @@
-/**
- * friso string type check functions,
- * like english/CJK, full-wdith/half-width, punctuation or not.
- * @see friso_UTF8.c and friso_GBK.c for detail.
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "friso_ctype.h"
-#include "friso_API.h"
-
-/* check if the specified string is a cn string.
- *
- * @return int (true for cn string or false)
- * */
-FRISO_API int friso_cn_string(
-    friso_charset_t charset,
-    friso_task_t task) {
-    if(charset == FRISO_UTF8) {
-        return utf8_cjk_string(task->unicode);
-    } else if(charset == FRISO_GBK) {
-        return gbk_cn_string(task->buffer);
-    }
-
-    return 0;
-}
-
-//check if the specified word is a whitespace.
-FRISO_API int friso_whitespace(
-    friso_charset_t charset,
-    friso_task_t task) {
-    if(charset == FRISO_UTF8) {
-        return utf8_whitespace(task->unicode);
-    } else if(charset == FRISO_GBK) {
-        return gbk_whitespace(task->buffer);
-    }
-
-    return 0;
-}
-
-//check if the specifiled word is a numeric letter.
-FRISO_API int friso_numeric_letter(
-    friso_charset_t charset,
-    friso_task_t task) {
-    if(charset == FRISO_UTF8) {
-        return utf8_numeric_letter((uint_t) task->text[task->idx]);
-    } else if(charset == FRISO_GBK) {
-        return gbk_numeric_letter(task->text + task->idx);
-    }
-
-    return 0;
-}
-
-//check if the specified word is aa english letter.
-FRISO_API int friso_en_letter(
-    friso_charset_t charset,
-    friso_task_t task) {
-    if(charset == FRISO_UTF8) {
-        return utf8_en_letter((uint_t) task->text[task->idx]);
-    } else if(charset == FRISO_GBK) {
-        return gbk_en_letter(task->text + task->idx);
-    }
-
-    return 0;
-}
-
-//check if the specified word is a half-width letter.
-//    punctuations are inclued.
-FRISO_API int friso_halfwidth_en_char(
-    friso_charset_t charset,
-    friso_task_t task) {
-    if(charset == FRISO_UTF8) {
-        return utf8_halfwidth_en_char(task->unicode);
-    } else if(charset == FRISO_GBK) {
-        return gbk_halfwidth_en_char(task->buffer[0]);
-    }
-
-    return 0;
-}
-
-//check if the specified word is a full-width letter.
-//    full-width punctuations are not included.
-FRISO_API int friso_fullwidth_en_char(
-    friso_charset_t charset,
-    friso_task_t task) {
-    if(charset == FRISO_UTF8) {
-        return utf8_fullwidth_en_char(task->unicode);
-    } else if(charset == FRISO_GBK) {
-        return gbk_fullwidth_en_char(task->buffer);
-    }
-
-    return 0;
-}
-
-//check if the specified word is an english punctuations.
-FRISO_API int friso_en_punctuation(
-    friso_charset_t charset,
-    friso_task_t task) {
-    if(charset == FRISO_UTF8) {
-        return utf8_en_punctuation(task->unicode);
-    } else if(charset == FRISO_GBK) {
-        return gbk_en_punctuation(task->buffer[0]);
-    }
-
-    return 0;
-}
-
-//check if the specified word ia sn chinese punctuation.
-FRISO_API int friso_cn_punctuation(
-    friso_charset_t charset,
-    friso_task_t task) {
-    if(charset == FRISO_UTF8) {
-        return utf8_cn_punctuation(task->unicode);
-    } else if(charset == FRISO_GBK) {
-        return gbk_cn_punctuation(task->buffer);
-    }
-
-    return 0;
-}
-
-FRISO_API int friso_letter_number(
-    friso_charset_t charset,
-    friso_task_t task) {
-    return 0;
-}
-
-FRISO_API int friso_other_number(
-    friso_charset_t charset,
-    friso_task_t task) {
-    return 0;
-}
-
-//check if the word is a keep punctuation.
-//@Deprecated
-//FRISO_API int friso_keep_punctuation(
-//    friso_charset_t charset,
-//    friso_task_t task )
-//{
-//    if ( charset == FRISO_UTF8 )
-//    return utf8_keep_punctuation( task->buffer );
-//    else if ( charset == FRISO_GBK )
-//    return gbk_keep_punctuation( task->buffer );
-//    return 0;
-//}
-
-//check if the specified char is en english punctuation.
-//    this function is the same as friso_en_punctuation.
-FRISO_API int is_en_punctuation(
-    friso_charset_t charset, char c) {
-    if(charset == FRISO_UTF8) {
-        return utf8_en_punctuation((uint_t) c);
-    } else if(charset == FRISO_GBK) {
-        return gbk_en_punctuation(c);
-    }
-
-    return 0;
-}
-
-//check the specified string is make up with numeric.
-FRISO_API int friso_numeric_string(
-    friso_charset_t charset,
-    char *buffer) {
-    if(charset == FRISO_UTF8) {
-        return utf8_numeric_string(buffer);
-    } else if(charset == FRISO_GBK) {
-        return gbk_numeric_string(buffer);
-    }
-
-    return 0;
-}
-
-//check the specified string is a decimal string.
-FRISO_API int friso_decimal_string(
-    friso_charset_t charset, char *buffer) {
-    if(charset == FRISO_UTF8) {
-        return utf8_decimal_string(buffer);
-    } else if(charset == FRISO_GBK) {
-        return gbk_decimal_string(buffer);
-    }
-
-    return 0;
-}
-
-//check if the specified char is english uppercase letter.
-//    included full-width and half-width letters.
-FRISO_API int friso_uppercase_letter(
-    friso_charset_t charset,
-    friso_task_t task) {
-    if(charset == FRISO_UTF8) {
-        return utf8_uppercase_letter(task->unicode);
-    } else if(charset == FRISO_GBK) {
-        return gbk_uppercase_letter(task->buffer);
-    }
-
-    return 0;
-}
-
-/* get the type of the specified char.
- *     the type will be the constants defined above.
- * (include the fullwidth english char.)
- */
-FRISO_API friso_enchar_t friso_enchar_type(
-    friso_charset_t charset,
-    friso_task_t task) {
-    //Unicode or ASCII.(Both UTF-8 and GBK are valid)
-    uint_t u = 0;
-
-    if(charset == FRISO_UTF8) {
-        u = task->unicode;
-        //if ( u >= 65280 ) u = 65280 - 65248;
-    } else if(charset == FRISO_GBK) {
-        u = (uchar_t)task->buffer[0];
-        //if ( u == 0xa3 ) ; //full-width.
-    }
-
-    //range check.
-    if(u > 126 || u < 32)    return FRISO_EN_UNKNOW;
-    if(u == 32)              return FRISO_EN_WHITESPACE;
-    if(u >= 48 && u <= 57)  return FRISO_EN_NUMERIC;
-    if(u >= 65 && u <= 90)  return FRISO_EN_LETTER;
-    if(u >= 97 && u <= 122)  return FRISO_EN_LETTER;
-
-    return FRISO_EN_PUNCTUATION;
-}
-
-/* get the type of the specified en char.
- *     the type will be the constants defined above.
- * (the char should be half-width english char only)
- */
-FRISO_API friso_enchar_t get_enchar_type(char ch) {
-    uint_t u = (uchar_t) ch;
-
-    //range check.
-    if(u > 126 || u < 32)    return FRISO_EN_UNKNOW;
-    if(u == 32)              return FRISO_EN_WHITESPACE;
-    if(u >= 48 && u <= 57)  return FRISO_EN_NUMERIC;
-    if(u >= 65 && u <= 90)  return FRISO_EN_LETTER;
-    if(u >= 97 && u <= 122)  return FRISO_EN_LETTER;
-
-    return FRISO_EN_PUNCTUATION;
-}
--- a/libfriso/friso/src/friso_ctype.h
+++ b/libfriso/friso/src/friso_ctype.h
@ -1,261 +0,0 @@
-/**
- * Friso charset about function interface header file.
- *     @package src/friso_charset.h .
- * Available charset for now:
- * 1. UTF8  - function start with utf8
- * 2. GBK   - function start with gbk
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#ifndef _friso_charset_h
-#define _friso_charset_h
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "friso.h"
-#include "friso_API.h"
-
-/** {{{ wrap interface */
-/* check if the specified string is a cn string.
- *
- * @return int (true for cn string or false)
- * */
-FRISO_API int friso_cn_string(friso_charset_t, friso_task_t);
-
-//check if the specified word is a whitespace.
-FRISO_API int friso_whitespace(friso_charset_t, friso_task_t);
-
-//check if the specifiled word is a numeric letter.
-FRISO_API int friso_numeric_letter(friso_charset_t, friso_task_t);
-
-//check if the specified word is a english letter.
-FRISO_API int friso_en_letter(friso_charset_t, friso_task_t);
-
-//check if the specified word is a half-width letter.
-//    punctuations are inclued.
-FRISO_API int friso_halfwidth_en_char(friso_charset_t, friso_task_t);
-
-//check if the specified word is a full-width letter.
-//    full-width punctuations are not included.
-FRISO_API int friso_fullwidth_en_char(friso_charset_t, friso_task_t);
-
-//check if the specified word is an english punctuations.
-FRISO_API int friso_en_punctuation(friso_charset_t, friso_task_t);
-
-//check if the specified word ia sn chinese punctuation.
-FRISO_API int friso_cn_punctuation(friso_charset_t, friso_task_t);
-
-FRISO_API int friso_letter_number(friso_charset_t, friso_task_t);
-FRISO_API int friso_other_number(friso_charset_t, friso_task_t);
-
-//check if the word is a keep punctuation.
-//@Deprecated
-//FRISO_API int friso_keep_punctuation( friso_charset_t, friso_task_t );
-
-//check the specified string is numeric string.
-FRISO_API int friso_numeric_string(friso_charset_t, char *);
-
-//check the specified string is a decimal string.
-FRISO_API int friso_decimal_string(friso_charset_t, char *);
-
-//check if the specified char is english uppercase letter.
-//    included full-width and half-width letters.
-FRISO_API int friso_uppercase_letter(friso_charset_t, friso_task_t);
-
-
-//en char type.
-//#define FRISO_EN_LETTER     0     //a-z && A-Z
-//#define FRISO_EN_NUMERIC    1    //0-9
-//#define FRISO_EN_PUNCTUATION    2    //english punctuations
-//#define FRISO_EN_WHITESPACE    3    //whitespace
-//#define FRISO_EN_UNKNOW        -1    //beyond 32-122
-typedef enum {
-    FRISO_EN_LETTER        = 0,    //A-Z, a-z
-    FRISO_EN_NUMERIC        = 1,    //0-9
-    FRISO_EN_PUNCTUATION    = 2,    //english punctuations
-    FRISO_EN_WHITESPACE        = 3,    //whitespace
-    FRISO_EN_UNKNOW        = -1    //unkow(beyond 32-126)
-} friso_enchar_t;
-
-/* get the type of the specified char.
- *     the type will be the constants defined above.
- * (include the fullwidth english char.)
- */
-FRISO_API friso_enchar_t friso_enchar_type(friso_charset_t, friso_task_t);
-
-/* get the type of the specified en char.
- *     the type will be the constants defined above.
- * (the char should be half-width english char only)
- */
-FRISO_API friso_enchar_t get_enchar_type(char);
-
-/* }}} */
-
-
-
-
-/** {{{ UTF8 interface*/
-
-/* read the next utf-8 word from the specified position.
- *
- * @return int    the bytes of the current readed word.
- */
-FRISO_API int utf8_next_word(friso_task_t, uint_t *, fstring);
-
-//get the bytes of a utf-8 char.
-FRISO_API int get_utf8_bytes(char);
-
-//return the unicode serial number of a given string.
-FRISO_API int get_utf8_unicode(const fstring);
-
-//convert the unicode serial to a utf-8 string.
-FRISO_API int unicode_to_utf8(uint_t, fstring);
-
-//check if the given char is a CJK.
-FRISO_API int utf8_cjk_string(uint_t) ;
-
-/*check the given char is a Basic Latin letter or not.
- *         include all the letters and english puntuations.*/
-FRISO_API int utf8_halfwidth_en_char(uint_t);
-
-/*
- * check the given char is a full-width latain or not.
- *    include the full-width arabic numeber, letters.
- *        but not the full-width puntuations.
- */
-FRISO_API int utf8_fullwidth_en_char(uint_t);
-
-//check the given char is a upper case letter or not.
-//    included all the full-width and half-width letters.
-FRISO_API int utf8_uppercase_letter(uint_t);
-
-//check the given char is a lower case letter or not.
-//    included all the full-width and half-width letters.
-FRISO_API int utf8_lowercase_letter(uint_t);
-
-//check the given char is a numeric.
-//    included the full-width and half-width arabic numeric.
-FRISO_API int utf8_numeric_letter(uint_t);
-
-/*
- * check if the given fstring is make up with numeric chars.
- *     both full-width,half-width numeric is ok.
- */
-FRISO_API int utf8_numeric_string(char *);
-
-FRISO_API int utf8_decimal_string(char *);
-
-//check the given char is a english char.
-//(full-width and half-width)
-//not the punctuation of course.
-FRISO_API int utf8_en_letter(uint_t);
-
-//check the given char is a whitespace or not.
-FRISO_API int utf8_whitespace(uint_t);
-
-/* check if the given char is a letter number like 'ⅠⅡ'
- */
-FRISO_API int utf8_letter_number(uint_t);
-
-/*
- * check if the given char is a other number like '①⑩⑽㈩'
- */
-FRISO_API int utf8_other_number(uint_t);
-
-//check if the given char is a english punctuation.
-FRISO_API int utf8_en_punctuation(uint_t) ;
-
-//check if the given char is a chinese punctuation.
-FRISO_API int utf8_cn_punctuation(uint_t u);
-
-FRISO_API int is_en_punctuation(friso_charset_t, char);
-//#define is_en_punctuation( c ) utf8_en_punctuation((uint_t) c)
-
-//@Deprecated
-//FRISO_API int utf8_keep_punctuation( fstring );
-/* }}} */
-
-
-
-
-/** {{{ GBK interface */
-
-/* read the next GBK word from the specified position.
- *
- * @return int    the bytes of the current readed word.
- */
-FRISO_API int gbk_next_word(friso_task_t, uint_t *, fstring);
-
-//get the bytes of a utf-8 char.
-FRISO_API int get_gbk_bytes(char);
-
-//check if the given char is a gbk char (ANSII string).
-FRISO_API int gbk_cn_string(char *) ;
-
-/*check if the given char is a ASCII letter
- *     include all the letters and english puntuations.*/
-FRISO_API int gbk_halfwidth_en_char(char);
-
-/*
- * check if the given char is a full-width latain.
- *    include the full-width arabic numeber, letters.
- *        but not the full-width puntuations.
- */
-FRISO_API int gbk_fullwidth_en_char(char *);
-
-//check if the given char is a upper case char.
-//    included all the full-width and half-width letters.
-FRISO_API int gbk_uppercase_letter(char *);
-
-//check if the given char is a lower case char.
-//    included all the full-width and half-width letters.
-FRISO_API int gbk_lowercase_letter(char *);
-
-//check if the given char is a numeric.
-//    included the full-width and half-width arabic numeric.
-FRISO_API int gbk_numeric_letter(char *);
-
-/*
- * check if the given fstring is make up with numeric chars.
- *     both full-width,half-width numeric is ok.
- */
-FRISO_API int gbk_numeric_string(char *);
-
-FRISO_API int gbk_decimal_string(char *);
-
-//check if the given char is a english(ASCII) char.
-//(full-width and half-width)
-//not the punctuation of course.
-FRISO_API int gbk_en_letter(char *);
-
-//check the specified char is a whitespace or not.
-FRISO_API int gbk_whitespace(char *);
-
-/* check if the given char is a letter number like 'ⅠⅡ'
- */
-FRISO_API int gbk_letter_number(char *);
-
-/*
- * check if the given char is a other number like '①⑩⑽㈩'
- */
-FRISO_API int gbk_other_number(char *);
-
-//check if the given char is a english punctuation.
-FRISO_API int gbk_en_punctuation(char) ;
-
-//check the given char is a chinese punctuation.
-FRISO_API int gbk_cn_punctuation(char *);
-
-//cause the logic handle is the same as the utf8.
-//    here invoke the utf8 interface directly.
-//FRISO_API int gbk_keep_punctuation( char * );
-//@Deprecated
-//#define gbk_keep_punctuation( str ) utf8_keep_punctuation(str)
-
-//check if the given english char is a full-width char or not.
-//FRISO_API int gbk_fullwidth_char( char * ) ;
-/* }}}*/
-
-#endif    /*end _friso_charset_h*/
--- a/libfriso/friso/src/friso_hash.c
+++ b/libfriso/friso/src/friso_hash.c
@ -1,285 +0,0 @@
-/*
- * friso hash table functions implementation defined in header file "friso_API.h".
-
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include "friso_API.h"
-#include <stdlib.h>
-#include <string.h>
-
-//-166411799L
-//31 131 1331 13331 133331 ..
-//31 131 1313 13131 131313 ..    the best
-#define HASH_FACTOR 1313131
-
-/* ************************
- *  mapping function area *
- **************************/
-__STATIC_API__ uint_t hash(fstring str, uint_t length) {
-    //hash code
-    uint_t h = 0;
-
-    while(*str != '\0') {
-        h = h * HASH_FACTOR + (*str++);
-    }
-
-    return (h % length);
-}
-
-/*test if a integer is a prime.*/
-__STATIC_API__ int is_prime(int n) {
-    int j;
-    if(n == 2 || n == 3) {
-        return 1;
-    }
-
-    if(n == 1 || n % 2 == 0) {
-        return 0;
-    }
-
-    for(j = 3; j * j < n; j++) {
-        if(n % j == 0) {
-            return 0;
-        }
-    }
-
-    return 1;
-}
-
-/*get the next prime just after the speicified integer.*/
-__STATIC_API__ int next_prime(int n) {
-    if(n % 2 == 0) n++;
-    for(; ! is_prime(n); n = n + 2) ;
-
-    return n;
-}
-
-//fstring copy, return the pointer of the new string.
-//static fstring string_copy( fstring _src ) {
-//int bytes = strlen( _src );
-//fstring _dst = ( fstring ) FRISO_MALLOC( bytes + 1 );
-//register int t = 0;
-
-//do {
-//_dst[t] = _src[t];
-//t++;
-//} while ( _src[t] != '\0' );
-//_dst[t] = '\0';
-
-//return _dst;
-//}
-
-/* *********************************
- * static hashtable function area. *
- ***********************************/
-__STATIC_API__ hash_entry_t new_hash_entry(
-    fstring key,
-    void * value,
-    hash_entry_t next) {
-    hash_entry_t e = (hash_entry_t)
-                     FRISO_MALLOC(sizeof(friso_hash_entry));
-    if(e == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    //e->_key = string_copy( key );
-    e->_key  = key;
-    e->_val  = value;
-    e->_next = next;
-
-    return e;
-}
-
-//create blocks copy of entries.
-__STATIC_API__ hash_entry_t * create_hash_entries(uint_t blocks) {
-    register uint_t t;
-    hash_entry_t *e = (hash_entry_t *)
-                      FRISO_CALLOC(sizeof(hash_entry_t), blocks);
-    if(e == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    for(t = 0; t < blocks; t++) {
-        e[t] = NULL;
-    }
-
-    return e;
-}
-
-//a static function to do the re-hash work.
-__STATIC_API__ void rebuild_hash(friso_hash_t _hash) {
-    //printf("rehashed.\n");
-    //find the next prime as the length of the hashtable.
-    uint_t t, length = next_prime(_hash->length * 2 + 1);
-    hash_entry_t e, next, *_src = _hash->table, \
-                                  *table = create_hash_entries(length);
-    uint_t bucket;
-
-    //copy the nodes
-    for(t = 0; t < _hash->length; t++) {
-        e = *(_src + t);
-        if(e != NULL) {
-            do {
-                next = e->_next;
-                bucket = hash(e->_key, length);
-                e->_next = table[bucket];
-                table[bucket] = e;
-                e = next;
-            } while(e != NULL);
-        }
-    }
-
-    _hash->table  = table;
-    _hash->length = length;
-    _hash->threshold = (uint_t)(_hash->length * _hash->factor);
-
-    //free the old hash_entry_t blocks allocations.
-    FRISO_FREE(_src);
-}
-
-/* ********************************
- * hashtable interface functions. *
- * ********************************/
-
-//create a new hash table.
-FRISO_API friso_hash_t new_hash_table(void) {
-    friso_hash_t  _hash = (friso_hash_t) FRISO_MALLOC(sizeof(friso_hash_cdt));
-    if(_hash == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    //initialize the the hashtable
-    _hash->length    = DEFAULT_LENGTH;
-    _hash->size      = 0;
-    _hash->factor    = DEFAULT_FACTOR;
-    _hash->threshold = (uint_t)(_hash->length * _hash->factor);
-    _hash->table     = create_hash_entries(_hash->length);
-
-    return _hash;
-}
-
-FRISO_API void free_hash_table(
-    friso_hash_t _hash,
-    fhash_callback_fn_t fentry_func) {
-    register uint_t j;
-    hash_entry_t e, n;
-
-    for(j = 0; j < _hash->length; j++) {
-        e = *(_hash->table + j);
-        for(; e != NULL ;) {
-            n = e->_next;
-            if(fentry_func != NULL) fentry_func(e);
-            FRISO_FREE(e);
-            e = n;
-        }
-    }
-
-    //free the pointer array block ( 4 * htable->length continuous bytes ).
-    FRISO_FREE(_hash->table);
-    FRISO_FREE(_hash);
-}
-
-
-//put a new mapping insite.
-//the value cannot be NULL.
-FRISO_API void *hash_put_mapping(
-    friso_hash_t _hash,
-    fstring key,
-    void * value) {
-    uint_t bucket = (key == NULL) ? 0 : hash(key, _hash->length);
-    hash_entry_t e = *(_hash->table + bucket);
-    void *oval = NULL;
-
-    //check the given key is already exists or not.
-    for(; e != NULL; e = e->_next) {
-        if(key == e->_key
-                || (key != NULL && e->_key != NULL
-                    && strcmp(key, e->_key) == 0)) {
-            oval = e->_val;     //bak the old value
-            e->_key = key;
-            e->_val = value;
-            return oval;
-        }
-    }
-
-    //put a new mapping into the hashtable.
-    _hash->table[bucket] = new_hash_entry(key, value, _hash->table[bucket]);
-    _hash->size++;
-
-    //check the condition to rebuild the hashtable.
-    if(_hash->size >= _hash->threshold) {
-        rebuild_hash(_hash);
-    }
-
-    return oval;
-}
-
-//check the existence of the mapping associated with the given key.
-FRISO_API int hash_exist_mapping(
-    friso_hash_t _hash, fstring key) {
-    uint_t bucket = (key == NULL) ? 0 : hash(key, _hash->length);
-    hash_entry_t e;
-
-    for(e = *(_hash->table + bucket);
-            e != NULL; e = e->_next) {
-        if(key == e->_key
-                || (key != NULL && e->_key != NULL
-                    && strcmp(key, e->_key) == 0)) {
-            return 1;
-        }
-    }
-
-    return 0;
-}
-
-//get the value associated with the given key.
-FRISO_API void *hash_get_value(friso_hash_t _hash, fstring key) {
-    uint_t bucket = (key == NULL) ? 0 : hash(key, _hash->length);
-    hash_entry_t e;
-
-    for(e = *(_hash->table + bucket);
-            e != NULL; e = e->_next) {
-        if(key == e->_key
-                || (key != NULL && e->_key != NULL
-                    && strcmp(key, e->_key) == 0)) {
-            return e->_val;
-        }
-    }
-
-    return NULL;
-}
-
-//remove the mapping associated with the given key.
-FRISO_API hash_entry_t hash_remove_mapping(
-    friso_hash_t _hash, fstring key) {
-    uint_t bucket = (key == NULL) ? 0 : hash(key, _hash->length);
-    hash_entry_t e, prev = NULL;
-    hash_entry_t b;
-
-    for(e = *(_hash->table + bucket);
-            e != NULL; prev = e, e = e->_next) {
-        if(key == e->_key
-                || (key != NULL && e->_key != NULL
-                    && strcmp(key, e->_key) == 0)) {
-            b = e;
-            //the node located at *( htable->table + bucket )
-            if(prev == NULL) {
-                _hash->table[bucket] = e->_next;
-            } else {
-                prev->_next = e->_next;
-            }
-            //printf("%s was removed\n", b->_key);
-            _hash->size--;
-            //FRISO_FREE( b );
-            return b;
-        }
-    }
-
-    return NULL;
-}
-
-//count the size.(A macro define has replace this.)
-//FRISO_API uint_t hash_get_size( friso_hash_t _hash ) {
-//    return _hash->size;
-//}
--- a/libfriso/friso/src/friso_lexicon.c
+++ b/libfriso/friso/src/friso_lexicon.c
@ -1,540 +0,0 @@
-/*
- * friso lexicon functions implementation.
- * used to deal with the friso lexicon, like: load,remove,match...
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include "friso_API.h"
-#include "friso.h"
-
-#define __SPLIT_MAX_TOKENS__     5
-#define __LEX_FILE_DELIME__     '#'
-#define __FRISO_LEX_IFILE__    "friso.lex.ini"
-
-//create a new lexicon
-FRISO_API friso_dic_t friso_dic_new() {
-    register uint_t t;
-    friso_dic_t dic = (friso_dic_t) FRISO_CALLOC(
-                          sizeof(friso_hash_t), __FRISO_LEXICON_LENGTH__);
-    if(dic == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    for(t = 0; t < __FRISO_LEXICON_LENGTH__; t++) {
-        dic[t] = new_hash_table();
-    }
-
-    return dic;
-}
-
-/**
- * default callback function to invoke
- *     when free the friso dictionary .
- *
- * @date 2013-06-12
- */
-__STATIC_API__ void default_fdic_callback(hash_entry_t e) {
-    register uint_t i;
-    friso_array_t syn;
-    lex_entry_t lex = (lex_entry_t) e->_val;
-    //free the lex->word
-    FRISO_FREE(lex->word);
-    //free the lex->syn if it is not NULL
-    if(lex->syn != NULL) {
-        syn = lex->syn;
-        for(i = 0; i < syn->length; i++) {
-            FRISO_FREE(syn->items[i]);
-        }
-        free_array_list(syn);
-    }
-
-    //free the e->_val
-    //@date 2014-01-28 posted by mlemay@gmail.com
-    FRISO_FREE(lex);
-}
-
-FRISO_API void friso_dic_free(friso_dic_t dic) {
-    register uint_t t;
-    for(t = 0; t < __FRISO_LEXICON_LENGTH__; t++) {
-        //free the hash table
-        free_hash_table(dic[t], default_fdic_callback);
-    }
-
-    FRISO_FREE(dic);
-}
-
-
-//create a new lexicon entry
-FRISO_API lex_entry_t new_lex_entry(
-    fstring word,
-    friso_array_t syn,
-    uint_t fre,
-    uint_t length,
-    uint_t type) {
-    lex_entry_t e = (lex_entry_t)
-                    FRISO_MALLOC(sizeof(lex_entry_cdt));
-    if(e == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    //initialize.
-    e->word   = word;
-    e->syn    = syn;            //synoyum words array list.
-    e->pos    = NULL;            //part of speech array list.
-    //e->py    = NULL; //set to NULL first.
-    e->fre    = fre;
-    e->length = (uchar_t) length;    //length
-    e->rlen   = (uchar_t) length;    //set to length by default.
-    e->type   = (uchar_t) type;    //type
-    e->ctrlMask = 0;            //control mask.
-    e->offset = -1;
-
-    return e;
-}
-
-/**
- * free the given lexicon entry.
- * you have to do three thing maybe:
- * 1. free where its syn items points to. (not implemented)
- * 2. free its syn. (friso_array_t)
- * 3. free its pos. (friso_array_t)
- * 4. free the lex_entry_t.
- */
-FRISO_API void free_lex_entry_full(lex_entry_t e) {
-    register uint_t i;
-    friso_array_t syn;
-
-    //free the lex->word
-    FRISO_FREE(e->word);
-    //free the lex->syn if it is not NULL
-    if(e->syn != NULL) {
-        syn = e->syn;
-        for(i = 0; i < syn->length; i++) {
-            FRISO_FREE(syn->items[i]);
-        }
-        free_array_list(syn);
-    }
-
-    //free the e->_val
-    //@date 2014-01-28 posted by mlemay@gmail.com
-    FRISO_FREE(e);
-}
-
-FRISO_API void free_lex_entry(lex_entry_t e) {
-    //if ( e->syn != NULL ) {
-    //    if ( flag == 1 ) free_array_list( e->syn);
-    //    else free_array_list( e->syn );
-    //}
-
-    FRISO_FREE(e);
-}
-
-
-//add a new entry to the dictionary.
-FRISO_API void friso_dic_add(
-    friso_dic_t dic,
-    friso_lex_t lex,
-    fstring word,
-    friso_array_t syn) {
-    void *olex = NULL;
-    if(lex >= 0 && lex < __FRISO_LEXICON_LENGTH__) {
-        //printf("lex=%d, word=%s, syn=%s\n", lex, word, syn);
-        olex = hash_put_mapping(dic[lex], word,
-                                new_lex_entry(word, syn, 0,
-                                              (uint_t) strlen(word), (uint_t) lex));
-        if(olex != NULL) {
-            free_lex_entry_full((lex_entry_t)olex);
-        }
-    }
-}
-
-FRISO_API void friso_dic_add_with_fre(
-    friso_dic_t dic,
-    friso_lex_t lex,
-    fstring word,
-    friso_array_t syn,
-    uint_t frequency) {
-    void *olex = NULL;
-    if(lex >= 0 && lex < __FRISO_LEXICON_LENGTH__) {
-        olex = hash_put_mapping(dic[lex], word,
-                                new_lex_entry(word, syn, frequency,
-                                              (uint_t) strlen(word), (uint_t) lex));
-        if(olex != NULL) {
-            free_lex_entry_full((lex_entry_t)olex);
-        }
-    }
-}
-
-/*
- * read a line from a specified stream.
- *         the newline will be cleared.
- *
- * @date    2012-11-24
- */
-FRISO_API fstring file_get_line(fstring __dst, FILE * _stream) {
-    register int c;
-    fstring cs;
-
-    cs = __dst;
-    while((c = fgetc(_stream)) != EOF) {
-        if(c == '\n') break;
-        *cs++ = c;
-    }
-    *cs = '\0';
-
-    return (c == EOF && cs == __dst) ? NULL : __dst;
-}
-
-/*
- * static function to copy a string.
- */
-///instead of memcpy
-__STATIC_API__ fstring string_copy(
-    fstring _src,
-    fstring __dst,
-    uint_t blocks) {
-
-    register fstring __src = _src;
-    register uint_t t;
-
-    for(t = 0; t < blocks; t++) {
-        if(*__src == '\0') break;
-        __dst[t] = *__src++;
-    }
-    __dst[t] = '\0';
-
-    return __dst;
-}
-
-/**
- * make a heap allocation, and copy the
- *     source fstring to the new allocation, and
- *     you should free it after use it .
- *
- * @param _src      source fstring
- * @param blocks    number of bytes to copy
- */
-__STATIC_API__ fstring string_copy_heap(
-    fstring _src, uint_t blocks) {
-    register uint_t t;
-
-    fstring str = (fstring) FRISO_MALLOC(blocks + 1);
-    if(str == NULL) {
-        ___ALLOCATION_ERROR___;
-    }
-
-    for(t = 0; t < blocks; t++) {
-        //if ( *_src == '\0' ) break;
-        str[t] = *_src++;
-    }
-
-    str[t] = '\0';
-    return str;
-}
-
-/*
- * find the postion of the first appear of the given char.
- *    address of the char in the fstring will be return .
- *    if not found NULL will be return .
- */
-__STATIC_API__ fstring indexOf(fstring __str, char delimiter) {
-    uint_t i, __length__;
-
-    __length__ = strlen(__str);
-    for(i = 0; i < __length__; i++) {
-        if(__str[i] == delimiter) {
-            return __str + i;
-        }
-    }
-
-    return NULL;
-}
-
-/**
- * load all the valid wors from a specified lexicon file .
- *
- * @param dic        friso dictionary instance (A hash array)
- * @param lex        the lexicon type
- * @param lex_file    the path of the lexicon file
- * @param length    the maximum length of the word item
- */
-FRISO_API void friso_dic_load(
-    friso_t friso,
-    friso_config_t config,
-    friso_lex_t lex,
-    fstring lex_file,
-    uint_t length) {
-
-    FILE * _stream;
-    char __char[1024], _buffer[512];
-    fstring _line;
-    string_split_entry sse;
-
-    fstring _word;
-    char _sbuffer[512];
-    fstring _syn;
-    friso_array_t sywords;
-    uint_t _fre;
-
-    if((_stream = fopen(lex_file, "rb")) != NULL) {
-        while((_line = file_get_line(__char, _stream)) != NULL) {
-            //clear up the notes
-            //make sure the length of the line is greater than 1.
-            //like the single '#' mark in stopwords dictionary.
-            if(_line[0] == '#' && strlen(_line) > 1) continue;
-
-            //handle the stopwords.
-            if(lex == __LEX_STOPWORDS__) {
-                //clean the chinese words that its length is greater than max length.
-                if(((int)_line[0]) < 0 && strlen(_line) > length) continue;
-                friso_dic_add(friso->dic, __LEX_STOPWORDS__,
-                              string_copy_heap(_line, strlen(_line)), NULL);
-                continue;
-            }
-
-            //split the fstring with '/'.
-            string_split_reset(&sse, "/", _line);
-            if(string_split_next(&sse, _buffer) == NULL) {
-                continue;
-            }
-
-            //1. get the word.
-            _word = string_copy_heap(_buffer, strlen(_buffer));
-
-            if(string_split_next(&sse, _buffer) == NULL) {
-                //normal lexicon type,
-                //add them to the dictionary directly
-                friso_dic_add(friso->dic, lex, _word, NULL);
-                continue;
-            }
-
-            /*
-             * filter out the words that its length is larger
-             *     than the specified limit.
-             * but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__
-             *     and __LEX_CEM_WORDS__.
-             */
-            if(!(lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__)
-                    && strlen(_word) > length) {
-                FRISO_FREE(_word);
-                continue;
-            }
-
-            //2. get the synonyms words.
-            _syn = NULL;
-            if(strcmp(_buffer, "null") != 0) {
-                _syn = string_copy(_buffer, _sbuffer, strlen(_buffer));
-            }
-
-            //3. get the word frequency if it available.
-            _fre = 0;
-            if(string_split_next(&sse, _buffer) != NULL) {
-                _fre = atoi(_buffer);
-            }
-
-            /**
-             * Here:
-             * split the synonyms words with mark ","
-             *     and put them in a array list if the synonyms is not NULL
-             */
-            sywords = NULL;
-            if(config->add_syn && _syn != NULL) {
-                string_split_reset(&sse, ",", _sbuffer);
-                sywords = new_array_list_with_opacity(5);
-                while(string_split_next(&sse, _buffer) != NULL) {
-                    if(strlen(_buffer) > length) continue;
-                    array_list_add(sywords,
-                                   string_copy_heap(_buffer, strlen(_buffer)));
-                }
-                sywords = array_list_trim(sywords);
-            }
-
-            //4. add the word item
-            friso_dic_add_with_fre(
-                friso->dic, lex, _word, sywords, _fre);
-        }
-
-        fclose(_stream);
-    } else {
-        fprintf(stderr, "Warning: Fail to open lexicon file %s\n", lex_file);
-        fprintf(stderr, "Warning: Without lexicon file, segment results will not correct \n");
-    }
-}
-
-
-/**
- * get the lexicon type index with the specified
- *     type keywords .
- *
- * @see        friso.h#friso_lex_t
- * @param     _key
- * @return     int
- */
-__STATIC_API__ friso_lex_t get_lexicon_type_with_constant(fstring _key) {
-    if(strcmp(_key, "__LEX_CJK_WORDS__") == 0) {
-        return __LEX_CJK_WORDS__;
-    } else if(strcmp(_key, "__LEX_CJK_UNITS__") == 0) {
-        return __LEX_CJK_UNITS__;
-    } else if(strcmp(_key, "__LEX_ECM_WORDS__") == 0) {
-        return __LEX_ECM_WORDS__;
-    } else if(strcmp(_key, "__LEX_CEM_WORDS__") == 0) {
-        return __LEX_CEM_WORDS__;
-    } else if(strcmp(_key, "__LEX_CN_LNAME__") == 0) {
-        return __LEX_CN_LNAME__;
-    } else if(strcmp(_key, "__LEX_CN_SNAME__") == 0) {
-        return __LEX_CN_SNAME__;
-    } else if(strcmp(_key, "__LEX_CN_DNAME1__") == 0) {
-        return __LEX_CN_DNAME1__;
-    } else if(strcmp(_key, "__LEX_CN_DNAME2__") == 0) {
-        return __LEX_CN_DNAME2__;
-    } else if(strcmp(_key, "__LEX_CN_LNA__") == 0) {
-        return __LEX_CN_LNA__;
-    } else if(strcmp(_key, "__LEX_STOPWORDS__") == 0) {
-        return __LEX_STOPWORDS__;
-    } else if(strcmp(_key, "__LEX_ENPUN_WORDS__") == 0) {
-        return __LEX_ENPUN_WORDS__;
-    } else if(strcmp(_key, "__LEX_EN_WORDS__") == 0) {
-        return __LEX_EN_WORDS__;
-    }
-
-    return -1;
-}
-
-/*
- * load the lexicon configuration file.
- *        and load all the valid lexicon from the configuration file.
- *
- * @param friso     friso instance
- * @param config    friso_config instance
- * @param _path     dictionary directory
- * @param _limitts  words length limit
- */
-FRISO_API void friso_dic_load_from_ifile(
-    friso_t friso,
-    friso_config_t config,
-    fstring _path,
-    uint_t _limits) {
-
-    //1.parse the configuration file.
-    FILE *__stream;
-    char __chars__[1024], __key__[30], *__line__;
-    uint_t __length__, i, t;
-    friso_lex_t lex_t;
-    string_buffer_t sb;
-
-    //get the lexicon configruation file path
-    sb = new_string_buffer();
-
-    string_buffer_append(sb, _path);
-    string_buffer_append(sb, __FRISO_LEX_IFILE__);
-    //printf("%s\n", sb->buffer);
-
-    if((__stream = fopen(sb->buffer, "rb")) != NULL) {
-        while((__line__ =
-                    file_get_line(__chars__, __stream)) != NULL) {
-            //comment filter.
-            if(__line__[0] == '#')  continue;
-            if(__line__[0] == '\0') continue;
-
-            __length__ = strlen(__line__);
-            //item start
-            if(__line__[ __length__ - 1 ] == '[') {
-                //get the type key
-                for(i = 0; i < __length__
-                        && (__line__[i] == ' ' || __line__[i] == '\t'); i++);
-                for(t = 0; i < __length__; i++, t++) {
-                    if(__line__[i] == ' '
-                            || __line__[i] == '\t' || __line__[i] == ':') break;
-                    __key__[t] = __line__[i];
-                }
-                __key__[t] = '\0';
-
-                //get the lexicon type
-                lex_t = get_lexicon_type_with_constant(__key__);
-                if(lex_t == -1) continue;
-
-                //printf("key=%s, type=%d\n", __key__, lex_t );
-                while((__line__ = file_get_line(__chars__, __stream)) != NULL) {
-                    //comments filter.
-                    if(__line__[0] == '#') continue;
-                    if(__line__[0] == '\0') continue;
-
-                    __length__ = strlen(__line__);
-                    if(__line__[ __length__ - 1 ] == ']') break;
-
-                    for(i = 0; i < __length__
-                            && (__line__[i] == ' ' || __line__[i] == '\t'); i++);
-                    for(t = 0; i < __length__; i++, t++) {
-                        if(__line__[i] == ' '
-                                || __line__[i] == '\t' || __line__[i] == ';') break;
-                        __key__[t] = __line__[i];
-                    }
-                    __key__[t] = '\0';
-
-                    //load the lexicon item from the lexicon file.
-                    string_buffer_clear(sb);
-                    string_buffer_append(sb, _path);
-                    string_buffer_append(sb, __key__);
-                    //printf("key=%s, type=%d\n", __key__, lex_t);
-                    friso_dic_load(friso, config, lex_t, sb->buffer, _limits);
-                }
-
-            }
-
-        } //end while
-
-        fclose(__stream);
-    } else {
-        fprintf(stderr, "Warning: Fail to open the lexicon configuration file %s\n", sb->buffer);
-        fprintf(stderr, "Warning: Without lexicon file, segment results will not correct \n");
-    }
-
-    free_string_buffer(sb);
-}
-
-//match the item.
-FRISO_API int friso_dic_match(
-    friso_dic_t dic,
-    friso_lex_t lex,
-    fstring word) {
-    if(lex >= 0 && lex < __FRISO_LEXICON_LENGTH__) {
-        return hash_exist_mapping(dic[lex], word);
-    }
-    return 0;
-}
-
-//get the lex_entry_t associated with the word.
-FRISO_API lex_entry_t friso_dic_get(
-    friso_dic_t dic,
-    friso_lex_t lex,
-    fstring word) {
-    if(lex >= 0 && lex < __FRISO_LEXICON_LENGTH__) {
-        return (lex_entry_t) hash_get_value(dic[lex], word);
-    }
-    return NULL;
-}
-
-//get the size of the specified type dictionary.
-FRISO_API uint_t friso_spec_dic_size(
-    friso_dic_t dic,
-    friso_lex_t lex) {
-    if(lex >= 0 && lex < __FRISO_LEXICON_LENGTH__) {
-        return hash_get_size(dic[lex]);
-    }
-    return 0;
-}
-
-//get size of the whole dictionary.
-FRISO_API uint_t friso_all_dic_size(
-    friso_dic_t dic) {
-    register uint_t size = 0, t;
-
-    for(t = 0; t < __FRISO_LEXICON_LENGTH__; t++) {
-        size += hash_get_size(dic[t]);
-    }
-
-    return size;
-}
--- a/libfriso/friso/src/friso_link.c
+++ b/libfriso/friso/src/friso_link.c
@ -1,266 +0,0 @@
-/*
- * link list functions implementation defined in header file "friso_API.h".
- * when the link_node is being deleted, here we just free
- * the allocation of the node, not the allcation of it's value.
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include "friso_API.h"
-#include <stdlib.h>
-
-//create a new link list node.
-__STATIC_API__ link_node_t new_node_entry(
-    void * value,
-    link_node_t prev,
-    link_node_t next) {
-    link_node_t node = (link_node_t)
-                       FRISO_MALLOC(sizeof(link_node_entry));
-    if(node == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    node->value = value;
-    node->prev  = prev;
-    node->next  = next;
-
-    return node;
-}
-
-//create a new link list
-FRISO_API friso_link_t new_link_list(void) {
-    friso_link_t e = (friso_link_t)
-                     FRISO_MALLOC(sizeof(friso_link_entry));
-    if(e == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    //initialize the entry
-    e->head = new_node_entry(NULL, NULL, NULL);
-    e->tail = new_node_entry(NULL, e->head, NULL);
-    e->head->next = e->tail;
-    e->size = 0;
-
-    return e;
-}
-
-//free the given link list
-FRISO_API void free_link_list(friso_link_t link) {
-    link_node_t node, next;
-    for(node = link->head; node != NULL;) {
-        next = node->next;
-        FRISO_FREE(node);
-        node = next;
-    }
-
-    FRISO_FREE(link);
-}
-
-//clear all nodes in the link list.
-FRISO_API friso_link_t link_list_clear(
-    friso_link_t link) {
-    link_node_t node, next;
-    //free all the middle nodes.
-    for(node = link->head->next; node != link->tail;) {
-        next = node->next;
-        FRISO_FREE(node);
-        node = next;
-    }
-
-    link->head->next = link->tail;
-    link->tail->prev = link->head;
-    link->size = 0;
-
-    return link;
-}
-
-//get the size of the link list.
-//FRISO_API uint_t link_list_size( friso_link_t link ) {
-//    return link->size;
-//}
-
-//check if the link list is empty
-//FRISO_API int link_list_empty( friso_link_t link ) {
-//    return ( link->size == 0 );
-//}
-
-
-/*
- * find the node at a specified position.
- * static
- */
-__STATIC_API__ link_node_t get_node(
-    friso_link_t link, uint_t idx) {
-    link_node_t p = NULL;
-    register uint_t t;
-
-    if(idx >= 0 && idx < link->size) {
-        if(idx < link->size / 2) {           //find from the head.
-            p = link->head;
-            for(t = 0; t <= idx; t++)
-                p = p->next;
-        } else {                            //find from the tail.
-            p = link->tail;
-            for(t = link->size; t > idx; t--)
-                p = p->prev;
-        }
-    }
-
-    return p;
-}
-
-/*
- * insert a node before the given node.
- * static
- */
-//__STATIC_API__ void insert_before(
-//    friso_link_t link,
-//    link_node_t node,
-//    void * value )
-//{
-//    link_node_t e = new_node_entry( value, node->prev, node );
-//    e->prev->next = e;
-//    e->next->prev = e;
-//    //node->prev = e;
-//
-//    link->size++;
-//}
-#define insert_before( link, node, value ) \
-{ \
-    link_node_t e = new_node_entry( value, node->prev, node );    \
-    e->prev->next = e;                        \
-    e->next->prev = e;                        \
-    link->size++;                        \
-}
-
-/*
- * static function:
- * remove the given node, the allocation of the value will not free,
- * but we return it to you, you will free it youself when there is a necessary.
- *
- * @return the value of the removed node.
- */
-__STATIC_API__ void * remove_node(
-    friso_link_t link, link_node_t node) {
-    void * _value = node->value;
-
-    node->prev->next = node->next;
-    node->next->prev = node->prev;
-    link->size--;
-
-    FRISO_FREE(node);
-
-    return _value;
-}
-
-
-//add a new node to the link list.(insert just before the tail)
-FRISO_API void link_list_add(
-    friso_link_t link, void * value) {
-    insert_before(link, link->tail, value);
-}
-
-//add a new node before the given index.
-FRISO_API void link_list_insert_before(
-    friso_link_t link, uint_t idx, void * value) {
-    link_node_t node = get_node(link, idx);
-    if(node != NULL) {
-        insert_before(link, node, value);
-    }
-}
-
-/*
- * get the value with the specified node.
- *
- * @return the value of the node.
- */
-FRISO_API void * link_list_get(
-    friso_link_t link, uint_t idx) {
-    link_node_t node = get_node(link, idx);
-    if(node != NULL) {
-        return node->value;
-    }
-    return NULL;
-}
-
-/*
- * set the value of the node that located in the specified position.
- *  we did't free the allocation of the old value, we return it to you.
- *    free it yourself when it is necessary.
- *
- * @return the old value.
- */
-FRISO_API void *link_list_set(
-    friso_link_t link,
-    uint_t idx, void * value) {
-    link_node_t node = get_node(link, idx);
-    void * _value = NULL;
-
-    if(node != NULL) {
-        _value = node->value;
-        node->value = value;
-    }
-
-    return _value;
-}
-
-/*
- * remove the node located in the specified position.
- *
- * @see remove_node
- * @return the value of the node removed.
- */
-FRISO_API void *link_list_remove(
-    friso_link_t link, uint_t idx) {
-    link_node_t node = get_node(link, idx);
-
-    if(node != NULL) {
-        //printf("idx=%d, node->value=%s\n", idx, (string) node->value );
-        return remove_node(link, node);
-    }
-
-    return NULL;
-}
-
-/*
- * remove the given node from the given link list.
- *
- * @see remove_node.
- * @return the value of the node removed.
- */
-FRISO_API void *link_list_remove_node(
-    friso_link_t link,
-    link_node_t node) {
-    return remove_node(link, node);
-}
-
-//remove the first node after the head
-FRISO_API void *link_list_remove_first(
-    friso_link_t link) {
-    if(link->size > 0) {
-        return remove_node(link, link->head->next);
-    }
-    return NULL;
-}
-
-//remove the last node just before the tail.
-FRISO_API void *link_list_remove_last(
-    friso_link_t link) {
-    if(link->size > 0) {
-        return remove_node(link, link->tail->prev);
-    }
-    return NULL;
-}
-
-//append a node from the tail.
-FRISO_API void link_list_add_last(
-    friso_link_t link,
-    void *value) {
-    insert_before(link, link->tail, value);
-}
-
-//append a note just after the head.
-FRISO_API void link_list_add_first(
-    friso_link_t link, void *value) {
-    insert_before(link, link->head->next, value);
-}
--- a/libfriso/friso/src/friso_string.c
+++ b/libfriso/friso/src/friso_string.c
@ -1,298 +0,0 @@
-/*
- * utf-8 handle functions implementation.
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include "friso_API.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-/* ******************************************
- * fstring buffer functions implements.        *
- ********************************************/
-/**
- * create a new buffer
- * @Note:
- * 1. it's real length is 1 byte greater than the specifield value
- * 2. we did not do any optimization for the memory allocation to ...
- *     avoid the memory defragmentation.
- *
- * @date: 2014-10-16
- */
-__STATIC_API__ fstring create_buffer(uint_t length) {
-    fstring buffer = (fstring) FRISO_MALLOC(length + 1);
-    if(buffer == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    memset(buffer, 0x00, length + 1);
-
-    return buffer;
-}
-
-//the __allocs should not be smaller than sb->length
-__STATIC_API__ string_buffer_t resize_buffer(
-    string_buffer_t sb, uint_t __allocs) {
-    //create a new buffer.
-    //if ( __allocs < sb->length ) __allocs = sb->length + 1;
-    fstring str = create_buffer(__allocs);
-
-    //register uint_t t;
-    //for ( t = 0; t < sb->length; t++ ) {
-    //    str[t] = sb->buffer[t];
-    //}
-    memcpy(str, sb->buffer, sb->length);
-    FRISO_FREE(sb->buffer);
-
-    sb->buffer = str;
-    sb->allocs = __allocs;
-
-    return sb;
-}
-
-//create a new fstring buffer with a default opacity.
-//FRISO_API string_buffer_t new_string_buffer( void )
-//{
-//    return new_string_buffer_with_opacity( __BUFFER_DEFAULT_LENGTH__ );
-//}
-
-//create a new fstring buffer with the given opacity.
-FRISO_API string_buffer_t new_string_buffer_with_opacity(uint_t opacity) {
-    string_buffer_t sb = (string_buffer_t)
-                         FRISO_MALLOC(sizeof(string_buffer_entry));
-    if(sb == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    sb->buffer = create_buffer(opacity);
-    sb->length = 0;
-    sb->allocs = opacity;
-
-    return sb;
-}
-
-//create a buffer with the given string.
-FRISO_API string_buffer_t new_string_buffer_with_string(fstring str) {
-    //buffer allocations.
-    string_buffer_t sb = (string_buffer_t)
-                         FRISO_MALLOC(sizeof(string_buffer_entry));
-    if(sb == NULL) {
-        ___ALLOCATION_ERROR___
-    }
-
-    //initialize
-    sb->length = strlen(str);
-    sb->buffer = create_buffer(sb->length + __BUFFER_DEFAULT_LENGTH__);
-    sb->allocs = sb->length + __BUFFER_DEFAULT_LENGTH__;
-
-    //register uint_t t;
-    //copy the str to the buffer.
-    //for ( t = 0; t < sb->length; t++ ) {
-    //    sb->buffer[t] = str[t];
-    //}
-    memcpy(sb->buffer, str, sb->length);
-
-    return sb;
-}
-
-FRISO_API void string_buffer_append(
-    string_buffer_t sb, fstring __str) {
-    register uint_t __len__ = strlen(__str);
-
-    //check the necessity to resize the buffer.
-    if(sb->length + __len__ > sb->allocs) {
-        sb = resize_buffer(sb, (sb->length + __len__) * 2 + 1);
-    }
-
-    //register uint_t t;
-    ////copy the __str to the buffer.
-    //for ( t = 0; t < __len__; t++ ) {
-    //    sb->buffer[ sb->length++ ] = __str[t];
-    //}
-    memcpy(sb->buffer + sb->length, __str, __len__);
-    sb->length += __len__;
-}
-
-FRISO_API void string_buffer_append_char(
-    string_buffer_t sb, char ch) {
-    //check the necessity to resize the buffer.
-    if(sb->length + 1 > sb->allocs) {
-        sb = resize_buffer(sb, sb->length * 2 + 1);
-    }
-
-    sb->buffer[sb->length++] = ch;
-}
-
-FRISO_API void string_buffer_insert(
-    string_buffer_t sb,
-    uint_t idx,
-    fstring __str) {
-}
-
-/*
- * remove the given bytes from the buffer start from idx.
- *        this will cause the byte move after the idx+length.
- *
- * @return the new string.
- */
-FRISO_API fstring string_buffer_remove(
-    string_buffer_t sb,
-    uint_t idx,
-    uint_t length) {
-    uint_t t;
-    //move the bytes after the idx + length
-    for(t = idx + length; t < sb->length; t++) {
-        sb->buffer[t - length] = sb->buffer[t];
-    }
-    sb->buffer[t] = '\0';
-    //memcpy( sb->buffer + idx,
-    //        sb->buffer + idx + length,
-    //        sb->length - idx - length );
-
-    t = sb->length - idx;
-    if(t > 0) {
-        sb->length -= (t > length) ? length : t;
-    }
-    sb->buffer[sb->length - 1] = '\0';
-
-    return sb->buffer;
-}
-
-/*
- * turn the string_buffer to a string.
- *        or return the buffer of the string_buffer.
- */
-FRISO_API string_buffer_t string_buffer_trim(string_buffer_t sb) {
-    //resize the buffer.
-    if(sb->length < sb->allocs - 1) {
-        sb = resize_buffer(sb, sb->length + 1);
-    }
-    return sb;
-}
-
-/*
- * free the given fstring buffer.
- * and this function will not free the allocations of the
- *     string_buffer_t->buffer, we return it to you, if there is
- *     a necessary you could free it youself by calling free();
- */
-FRISO_API fstring string_buffer_devote(string_buffer_t sb) {
-    fstring buffer = sb->buffer;
-    FRISO_FREE(sb);
-    return buffer;
-}
-
-/*
- * clear the given fstring buffer.
- *        reset its buffer with 0 and reset its length to 0.
- */
-FRISO_API void string_buffer_clear(string_buffer_t sb) {
-    memset(sb->buffer, 0x00, sb->length);
-    sb->length = 0;
-}
-
-//free everything of the fstring buffer.
-FRISO_API void free_string_buffer(string_buffer_t sb) {
-    FRISO_FREE(sb->buffer);
-    FRISO_FREE(sb);
-}
-
-
-/**
- * create a new string_split_entry.
- *
- * @param    source
- * @return    string_split_t;
- */
-FRISO_API string_split_t new_string_split(
-    fstring delimiter,
-    fstring source) {
-    string_split_t e = (string_split_t)
-                       FRISO_MALLOC(sizeof(string_split_entry));
-    if(e == NULL) {
-        ___ALLOCATION_ERROR___;
-    }
-
-    e->delimiter = delimiter;
-    e->delLen = strlen(delimiter);
-    e->source = source;
-    e->srcLen = strlen(source);
-    e->idx = 0;
-
-    return e;
-}
-
-FRISO_API void string_split_reset(
-    string_split_t sst,
-    fstring delimiter,
-    fstring source) {
-    sst->delimiter = delimiter;
-    sst->delLen = strlen(delimiter);
-    sst->source = source;
-    sst->srcLen = strlen(source);
-    sst->idx = 0;
-}
-
-FRISO_API void string_split_set_source(
-    string_split_t sst, fstring source) {
-    sst->source = source;
-    sst->srcLen = strlen(source);
-    sst->idx = 0;
-}
-
-FRISO_API void string_split_set_delimiter(
-    string_split_t sst, fstring delimiter) {
-    sst->delimiter = delimiter;
-    sst->delLen = strlen(delimiter);
-    sst->idx = 0;
-}
-
-FRISO_API void free_string_split(string_split_t sst) {
-    FRISO_FREE(sst);
-}
-
-/**
- * get the next split fstring, and copy the
- *     splited fstring into the __dst buffer .
- *
- * @param    string_split_t
- * @param    __dst
- * @return    fstring (NULL if reach the end of the source
- *         or there is no more segmentation)
- */
-FRISO_API fstring string_split_next(
-    string_split_t sst, fstring __dst) {
-    uint_t i, _ok;
-    fstring _dst = __dst;
-
-    //check if reach the end of the fstring
-    if(sst->idx >= sst->srcLen) return NULL;
-
-    while(1) {
-        _ok = 1;
-        for(i = 0; i < sst->delLen
-                && (sst->idx + i < sst->srcLen); i++) {
-            if(sst->source[sst->idx + i] != sst->delimiter[i]) {
-                _ok = 0;
-                break;
-            }
-        }
-
-        //find the delimiter here,
-        //break the loop and self plus the sst->idx, then return the buffer .
-        if(_ok == 1) {
-            sst->idx += sst->delLen;
-            break;
-        }
-
-        //coy the char to the buffer
-        *_dst++ = sst->source[sst->idx++];
-        //check if reach the end of the fstring
-        if(sst->idx >= sst->srcLen) break;
-    }
-
-    *_dst = '\0';
-    return _dst;
-}
--- a/libfriso/friso/src/tst-array.c
+++ b/libfriso/friso/src/tst-array.c
@ -1,50 +0,0 @@
-/*
- * dynamatic array test program.
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include "friso_API.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-int main(int argc, char **args) {
-
-    //create a new array list.
-    friso_array_t array = new_array_list();
-    fstring keys[] = {
-        "chenmanwen", "yangqinghua",
-        "chenxin", "luojiangyan", "xiaoyanzi", "bibi",
-        "zhangrenfang", "yangjian",
-        "liuxiao", "pankai",
-        "chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo",
-        "caizaili", "panpan", "xiaolude", "yintanwen"
-    };
-    int j, idx = 2, len = sizeof(keys) / sizeof(fstring);
-
-    for(j = 0; j < len; j++) {
-        array_list_add(array, keys[j]);
-    }
-
-    printf("length=%d, allocations=%d\n", array->length, array->allocs);
-    array_list_trim(array);
-    printf("after tirm length=%d, allocations=%d\n", array->length, array->allocs);
-    printf("idx=%d, value=%s\n", idx, (fstring) array_list_get(array, idx));
-
-    printf("\nAfter set %dth item.\n", idx);
-    array_list_set(array, idx, "chenxin__");
-    printf("idx=%d, value=%s\n", idx, (fstring) array_list_get(array, idx));
-
-    printf("\nAfter remove %dth item.\n", idx);
-    array_list_remove(array, idx);
-    printf("length=%d, allocations=%d\n", array->length, array->allocs);
-    printf("idx=%d, value=%s\n", idx, (fstring) array_list_get(array, idx));
-
-    printf("\nInsert a item at %dth\n", idx);
-    array_list_insert(array, idx, "*chenxin*");
-    printf("idx=%d, value=%s\n", idx, (fstring) array_list_get(array, idx));
-
-    free_array_list(array);
-
-    return 0;
-}
--- a/libfriso/friso/src/tst-friso.c
+++ b/libfriso/friso/src/tst-friso.c
@ -1,161 +0,0 @@
-/*
- * Friso test program.
- *     Of couse you can make it a perfect demo for friso.
- * all threads or proccess share the same friso_t,
- *     defferent threads/proccess use defferent friso_task_t.
- * and you could share the friso_config_t if you wish...
- *
- * @author lionsoul<chenxin619315@gmail.com>
- */
-
-#include "friso_API.h"
-#include "friso.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#define __LENGTH__ 15
-#define __INPUT_LENGTH__ 20480
-#define ___EXIT_INFO___                    \
-    println("Thanks for trying friso.");        \
-break;
-
-#define ___ABOUT___                    \
-    println("+---------------------------------------------------------------+");    \
-    println("| Friso - a Chinese word segmentation writen by c.              |");    \
-    println("| bug report email - chenxin619315@gmail.com.                   |");    \
-    println("| or: visit https://github.com/lionsoul2014/friso.              |");    \
-    println("|     java version for https://github.com/lionsoul2014/jcseg    |");    \
-    println("| type 'quit' to exit the program.                              |");    \
-    println("+---------------------------------------------------------------+");
-
-//read a line from a command line.
-static fstring getLine(FILE *fp, fstring __dst) {
-    register int c;
-    register fstring cs;
-
-    cs = __dst;
-    while((c = getc(fp)) != EOF) {
-        if(c == '\n') break;
-        *cs++ = c;
-    }
-    *cs = '\0';
-
-    return (c == EOF && cs == __dst) ? NULL : __dst;
-}
-
-/*static void printcode( fstring str ) {
-  int i,length;
-  length = strlen( str );
-  printf("str:length=%d\n", length );
-  for ( i = 0; i < length; i++ ) {
-  printf("%d ", str[i] );
-  }
-  putchar('\n');
-  }*/
-
-int main(int argc, char **argv) {
-
-    clock_t s_time, e_time;
-    char line[__INPUT_LENGTH__] = {0};
-    int i;
-    fstring __path__ = NULL, mode = NULL;
-
-    friso_t friso;
-    friso_config_t config;
-    friso_task_t task;
-
-    // get the lexicon directory from command line arguments
-    for(i = 0; i < argc; i++) {
-        if(strcasecmp("-init", argv[i]) == 0) {
-            __path__ = argv[i + 1];
-        }
-    }
-
-    if(__path__ == NULL) {
-        println("Usage: friso -init lexicon path");
-        exit(0);
-    }
-
-    s_time = clock();
-
-    //initialize
-    friso  = friso_new();
-    config = friso_new_config();
-    /*friso_dic_t dic = friso_dic_new();
-      friso_dic_load_from_ifile( dic, __path__, __LENGTH__ );
-      friso_set_dic( friso, dic );
-      friso_set_mode( friso, __FRISO_COMPLEX_MODE__ );*/
-    if(friso_init_from_ifile(friso, config, __path__) != 1) {
-        printf("fail to initialize friso and config.\n");
-        goto err;
-    }
-
-    switch(config->mode) {
-    case __FRISO_SIMPLE_MODE__:
-        mode = "Simple";
-        break;
-    case __FRISO_COMPLEX_MODE__:
-        mode = "Complex";
-        break;
-    case __FRISO_DETECT_MODE__:
-        mode = "Detect";
-        break;
-    }
-
-    //friso_set_mode( config, __FRISO_DETECT_MODE__ );
-    //printf("clr_stw=%d\n", friso->clr_stw);
-    //printf("match c++?%d\n", friso_dic_match( friso->dic, __LEX_ENPUN_WORDS__, "c++" ));
-    //printf("match(研究)?%d\n", friso_dic_match( friso->dic, __LEX_CJK_WORDS__, "研究"));
-
-    e_time = clock();
-
-    printf("Initialized in %fsec\n", (double)(e_time - s_time) / CLOCKS_PER_SEC);
-    printf("Mode: %s\n", mode);
-    printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK");
-    ___ABOUT___;
-
-    //set the task.
-    task = friso_new_task();
-
-    while(1) {
-        print("friso>> ");
-        getLine(stdin, line);
-        //exit the programe
-        if(strcasecmp(line, "quit") == 0) {
-            ___EXIT_INFO___
-        }
-
-        //for ( i = 0; i < 1000000; i++ ) {
-        //set the task text.
-        friso_set_text(task, line);
-        println("分词结果:");
-
-        s_time = clock();
-        while((config->next_token(friso, config, task)) != NULL) {
-            printf(
-                "%s[%d, %d, %d] ",
-                task->token->word,
-                task->token->offset,
-                task->token->length,
-                task->token->rlen
-            );
-            // printf("%s ", task->token->word);
-        }
-        //}
-        e_time = clock();
-        printf("\nDone, cost < %fsec\n", ((double)(e_time - s_time)) / CLOCKS_PER_SEC);
-
-    }
-
-    friso_free_task(task);
-
-    //error block.
-err:
-    friso_free_config(config);
-    friso_free(friso);
-
-    return 0;
-}
--- a/libfriso/friso/src/tst-hash.c
+++ b/libfriso/friso/src/tst-hash.c
@ -1,65 +0,0 @@
-/**
- * hashmap testing program
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include "friso_API.h"
-#include <stdio.h>
-
-void print_hash_info(friso_hash_t _hash) {
-    printf("info:length=%d, size=%d, facotr=%f, threshold=%d\n", _hash->length, \
-           _hash->size, _hash->factor, _hash->threshold);
-}
-
-int main(int argc, char **argv) {
-    friso_hash_t _hash = new_hash_table();
-    char *names[] = {
-        "陈满文", "阳清华",
-        "陈鑫", "罗江艳",
-        "小燕子", "比比",
-        "张仁芳", "阳建",
-        "陈配", "李恒",
-        "张志刚", "张怡少",
-        "阳江波", "蔡再利",
-        "阳绘章", "尹唐文",
-        "谭志鹏", "肖路德",
-        "潘凯", "刘潇",
-        "马朝辉", "张强",
-        "殷美林", "元明清",
-        "周安", "郭桥安",
-        "刘敏", "黄广华",
-        "李胜", "黄海清"
-    };
-    //char *str[] = {"陈鑫", "张仁芳", "比比"};
-    char **str = names;
-    int j, len = 30;
-
-    print_hash_info(_hash);
-    for(j = 0; j < len; j++) {
-        hash_put_mapping(_hash, names[j], names[j]);
-    }
-
-    print_hash_info(_hash);
-
-    printf("Press any key to continue.");
-    getchar();
-
-    //remove mappings
-    for(j = 0; j < len; j++) {
-        printf("Exist %s?%2d\n", str[j], hash_exist_mapping(_hash, str[j]));
-        printf("Now, remove %s\n", str[j]);
-        hash_remove_mapping(_hash, str[j]);
-        printf("Exist %s?%2d\n", str[j], hash_exist_mapping(_hash, str[j]));
-        printf("*********************************\n");
-    }
-
-    printf("Press any key to continue.");
-    getchar();
-
-    print_hash_info(_hash);
-    //free the table
-    free_hash_table(_hash, 0);
-
-    return 0;
-}
--- a/libfriso/friso/src/tst-lex.c
+++ b/libfriso/friso/src/tst-lex.c
@ -1,108 +0,0 @@
-/*
- * lex functions test program.
- *
- * @author  lionsoul<chenxin619315@gmail.com>
- */
-
-#include "friso.h"
-#include <stdio.h>
-#include <time.h>
-#include <string.h>
-
-#define __LENGTH__ 15
-#define ___PRINT_HELP_INFO___                                   \
-    printf("1. help print the current menu.\n");                \
-    printf("2. #set set the classify of the dictionary.\n");    \
-    printf("3. other search the words in the dictionary.\n");   \
-    printf("4. quit exit the programe.\n");
-
-int main(int argc, char **argv) {
-    lex_entry_t e;
-    int lex = __LEX_CJK_WORDS__;
-    char _line[__LENGTH__];
-    clock_t s_time, e_time;
-    friso_t friso;
-    friso_config_t config;
-
-    s_time = clock();
-    friso  = friso_new();
-    config = friso_new_config();
-    config->add_syn = 0;
-    friso->dic = friso_dic_new();
-
-    //__CJK_WORDS__
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-main.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-admin.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-chars.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-cn-mz.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-cn-place.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-company.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-festival.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-flname.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-food.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-lang.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-nation.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-net.lex", __LENGTH__);
-    friso_dic_load(friso, config, __LEX_CJK_WORDS__, "../vendors/dict/UTF-8/lex-org.lex", __LENGTH__);
-
-    //__CJK_UNITS__
-    friso_dic_load(friso, config, __LEX_CJK_UNITS__, "../vendors/dict/UTF-8/lex-units.lex", __LENGTH__);
-    //__MIX_WORDS__
-    friso_dic_load(friso, config, __LEX_ECM_WORDS__, "../vendors/dict/UTF-8/lex-ecmixed.lex", __LENGTH__);
-    //__CN_LNAME__
-    friso_dic_load(friso, config, __LEX_CN_LNAME__, "../vendors/dict/UTF-8/lex-lname.lex", __LENGTH__);
-    //__CN_SNAME__
-    friso_dic_load(friso, config, __LEX_CN_SNAME__, "../vendors/dict/UTF-8/lex-sname.lex", __LENGTH__);
-    //__CN_DNAME1__
-    friso_dic_load(friso, config, __LEX_CN_DNAME1__, "../vendors/dict/UTF-8/lex-dname-1.lex", __LENGTH__);
-    //__CN_DNAME2__
-    friso_dic_load(friso, config, __LEX_CN_DNAME2__, "../vendors/dict/UTF-8/lex-dname-2.lex", __LENGTH__);
-    //__CN_LNA__
-    friso_dic_load(friso, config, __LEX_CN_LNA__, "../vendors/dict/UTF-8/lex-ln-adorn.lex", __LENGTH__);
-
-    e_time = clock();
-    printf(
-        "Done, cost: %f sec, size=%d\n",
-        (double)(e_time - s_time) / CLOCKS_PER_SEC,
-        friso_all_dic_size(friso->dic)
-    );
-
-    while(1) {
-        printf("friso-%d>> ", lex);
-        if(scanf("%s", _line) != 1) {
-            printf("Invalid input\n");
-            continue;
-        }
-
-        if(strcmp(_line, "quit") == 0) {
-            break;
-        }  else if(strcmp(_line, "help") == 0) {
-            ___PRINT_HELP_INFO___
-        } else if(strcmp(_line, "#set") == 0) {
-            printf("lex_t>> ");
-            if(scanf("%d", &lex) != 1) {
-                printf("Warning: Invalid lex type input\n");
-                continue;
-            }
-        } else {
-            s_time = clock();
-            e = friso_dic_get(friso->dic, lex, _line);
-            e_time = clock();
-            if(e != NULL) {
-                printf(
-                    "word=%s, syn=%s, fre=%d, cost:%fsec\n",
-                    e->word, e->syn == NULL ? "NULL" : (char *)e->syn->items[0],
-                    e->fre,
-                    (double)(e_time - s_time) / CLOCKS_PER_SEC
-                );
-            } else {
-                printf("%s was not found.\n", _line);
-            }
-        }
-    }
-
-    // friso_dic_free( friso->dic );
-    friso_free(friso);
-
-    return 0;
-}
--- a/Show More
+++ b/Show More