117 changed files with 1247775 additions and 63 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,53 +0,0 @@
-# C++ objects and libs
-*.slo
-*.lo
-*.o
-*.a
-*.la
-*.lai
-*.so
-*.so.*
-*.rc
-*.dll
-*.dylib
-
-# Qt-es
-object_script.*.Release
-object_script.*.Debug
-*_plugin_import.cpp
-/.qmake.cache
-/.qmake.stash
-*.pro.user
-*.pro.user.*
-*.qbs.user
-*.qbs.user.*
-*.moc
-moc_*.cpp
-moc_*.h
-qrc_*.cpp
-ui_*.h
-*.qmlc
-*.jsc
-Makefile*
-*build-*
-*.qm
-*.prl
-
-# Qt unit tests
-target_wrapper.*
-
-# QtCreator
-*.autosave
-
-# QtCreator Qml
-*.qmlproject.user
-*.qmlproject.user.*
-
-# QtCreator CMake
-CMakeLists.txt.user*
-
-# QtCreator 4.8< compilation database 
-compile_commands.json
-
-# QtCreator local machine specific files for imported projects
-*creator.user*
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +0,0 @@
-[submodule "libchinese-segmentation"]
-	path = libchinese-segmentation
-	url = https://gitee.com/openkylin/chinese-segmentation.git
--- a/debian/changelog
+++ b/debian/changelog
@ -0,0 +1,291 @@
+ukui-search (4.0.2.3-ok0) yangtze; urgency=medium
+
+  * Issues:https://gitee.com/openkylin/ukui-search/issues/I7E1IK?from=project-issue
+  * 其他改动：
+    - 补充设置页藏文翻译
+    - 解决当搜索UI关闭后未正确停止插件搜索线程的问题
+    - 解决遍历搜索时文件搜索插件长时间cpu占用较高的问题
+    - 解决文件搜索接口在使用索引搜索根目录时无法搜到文件的问题
+  * 其他改动影响域：
+    - 控制面板设置页
+    - 全局搜索应用UI结果列表
+    - 文件搜索功能
+    - 文件搜索接口
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Mon, 19 Jun 2023 14:28:06 +0800
+
+ukui-search (4.0.2.2-ok0) yangtze; urgency=medium
+
+  * Issues:https://gitee.com/openkylin/ukui-search/issues/I72NP7
+  * 其他改动：无
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 07 Jun 2023 15:44:31 +0800
+
+ukui-search (4.0.2.1-ok0) yangtze; urgency=medium
+
+  * Issues:NULL
+  * 其他改动：
+    - 修复添加索引目录后索引状态未更新的问题
+    - 搜索接口增加搜索结果通知机制和状态查询功能
+    - 搜索设置页面添加搜索目录时增加对无访问权限目录的过滤弹窗
+    - 优化搜索设置页面弹窗文案
+    - 修复了一个分词库的问题，该问题曾导致搜索或搜索服务在首次启动时偶现崩溃
+  * 其他改动影响域:
+    - 搜索服务接口，搜索设置页面增加搜索目录功能，搜索和搜索服务稳定性提升
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 06 Jun 2023 15:25:44 +0800
+
+ukui-search (4.0.1.5-ok0) yangtze; urgency=medium
+
+  * Issues:NULL
+  * 其他改动：
+    - 解决一处潜在的崩溃问题
+    - 修复搜索插件生命周期管理内存泄漏问题
+    - 修复搜索到需要安装的应用时无法正确跳转到软件商店的问题
+    - 修复偶现刚登陆时搜索会卡顿一段时间的问题
+    - 优化黑名单配置后台逻辑
+  * 其他改动影响域:
+    - 应用搜索功能；索引服务稳定性；升级时配置同步
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 11 May 2023 18:00:59 +0800
+
+ukui-search (4.0.1.3-ok0) yangtze; urgency=medium
+
+  * 更新版本号.
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 27 Apr 2023 14:11:36 +0800
+
+ukui-search (4.0.1.2-ok0~0421) yangtze; urgency=medium
+
+  * Issues:NULL
+  * 其他改动：
+    - 解决全局搜索中遍历搜索不能搜索到顶层文件夹的问题
+    - 解决索引提示弹窗会挡住模态窗口的问题
+    - 修复了一个问题，该问题曾导致添加曾经删除过的搜索目录时无法正确索引
+  * 其他改动影响域：
+    - 不开索引时的文件夹搜索功能
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 20 Apr 2023 11:40:39 +0800
+
+ukui-search (4.0.1.1-ok0~0419) yangtze; urgency=medium
+
+  * 备注:修复编译问题，重新传包
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 19 Apr 2023 10:53:46 +0800
+
+ukui-search (4.0.1.0-ok0~0418build1) yangtze; urgency=medium
+
+  * 备注:重新传包
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 18 Apr 2023 17:48:00 +0800
+
+ukui-search (4.0.1.0-ok0~0418) yangtze; urgency=medium
+
+  * Issues:NULL
+  * 其他改动：
+    - 应用数据服务增加设置应用是否打开过的接口
+    - 应用数据库增加AUTO_START字段
+    - 优化所有进程的日志打印机制
+  * 其他改动影响域：
+    - 开始菜单应用列表显示；
+    - 日志现在被打印到~/.log/ukui-search/目录下
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 18 Apr 2023 17:03:50 +0800
+
+ukui-search (4.0.0.0-ok0~0413) yangtze; urgency=medium
+
+  * 备注：去除libopencv-dev依赖重新传包
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 13 Apr 2023 17:44:34 +0800
+
+ukui-search (4.0.0.0-ok0~0412build1) yangtze; urgency=medium
+
+  * 备注：重新传包
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 12 Apr 2023 18:51:21 +0800
+
+ukui-search (4.0.0.0-ok0~0412) yangtze; urgency=medium
+
+  * Issues:NULL
+  * 其他改动：
+    - 优化搜索服务目录管理模块
+  * 其他改动影响域：
+    - 文件索引目录配置功能
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 12 Apr 2023 18:12:32 +0800
+
+ukui-search (3.22.5.2-ok0~0324) yangtze; urgency=medium
+
+  * Issues:NULL
+  * 其他改动：
+    - 优化应用数据服务，完善数据库查询和搜索接口实现。
+    - 文件索引服务优化inotify信号处理，解决若干由于信号处理不当导致的索引不正确问题
+    - 完善搜索服务文件搜索接口
+    - 增加UI针对屏幕尺寸变化时的自动更新位置操作
+    - 提升文本内容搜索准确度
+  * 其他改动影响域:
+    - 屏幕旋转时UI位置；应用搜索功能；文件索引服务；文件内容搜索功能。
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Fri, 24 Mar 2023 13:58:05 +0800
+
+ukui-search (3.22.4.2-ok0~0210) yangtze; urgency=medium
+
+  * Issues:NULL
+  * 其他改动：
+    - 解决在控制面板中搜索‘搜索’插件部分设置项无法搜到的问题。
+    - 更新libukui-search的pc文件。
+    - 优化应用搜索功能；修复偶现搜索结果重复问题。
+    - 修复偶现的由于队列处理不当导致的搜索应用崩溃问题。
+    - 修复当通过弹窗打开索引时，控制面板中不显示模糊搜索开关的问题。
+  * 其他改动影响域:
+    - 应用搜索功能；控制面板设置插件；全局搜索应用稳定性。
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Fri, 10 Feb 2023 11:26:15 +0800
+
+ukui-search (3.22.4.1-ok0~0105) yangtze; urgency=medium
+
+  * Issues:https://gitee.com/openkylin/ukui-search/issues/I64CCY?from=project-issue
+  * 其他改动：
+    - 增加文件监听对unmount信号的处理。
+    - 更新设置搜索接口，解决设置搜索不准确的问题。
+    - 解决在目录发生变动时，控制面板索引目录选择页面为更新的问题。
+    - 解决了一些内存泄露问题。
+    - 解决系统字体变化时UI未能及时更新的问题。
+    - 增加桌面小插件不随系统缩放更改尺寸的特性。
+  * 其他改动影响域：
+    - 全局搜索UI；文件索引功能；设置项搜索功能;桌面搜索插件;控制面板搜索设置页。
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 05 Jan 2023 10:43:22 +0800
+
+ukui-search (3.22.3.1-ok0~1222) yangtze; urgency=medium
+
+  * Issues:NULL
+  * 其他改动：
+    - 解决平板模式下搜索结果列表右侧被滚动条遮挡的问题。
+    - 修复反复开关索引导致的索引异常问题（偶现打开索引失败）。
+    - 解决注册文本内容插件命令失效问题。
+    - 搜索服务应用数据库新增跟随版本更新机制。
+    - 更新README.md。
+  * 其他改动影响域：
+    - 全局搜索UI；文件索引功能；应用搜索功能；应用数据服务。
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 22 Dec 2022 11:36:41 +0800
+
+ukui-search (3.22.3.0-ok0~1212) yangtze; urgency=medium
+
+  * Issues: https://gitee.com/openkylin/ukui-search/issues/I658EV?from=project-issue
+            https://gitee.com/openkylin/ukui-search/issues/I657OY?from=project-issue
+            https://gitee.com/openkylin/ukui-search/issues/I657G6?from=project-issue
+  * 其他改动：
+    - 完善汉字转拼音接口逻辑，解决多音字识别不准确的问题。
+    - 增加索引数据库版本号机制，版本号升级时将重建数据库。
+    - 修复UI最佳匹配与结果分类触摸行为不一致的问题。
+    - 增加控制面板插件模糊搜索开关，添加搜索目录功能。
+    - 解决添加和删除索引目录异常的问题。
+    - 解决wayland下失焦后再激活窗口后焦点异常问题。
+    - 增加了一个用于调试的索引状态监控页面，可通过'ukui-search-service -m'打开
+  * 其他改动影响域：
+    - 全局搜索UI；文件索引功能；控制面板插件；拼音搜索功能。
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Mon, 12 Dec 2022 15:49:35 +0800
+
+ukui-search (3.22.2.1-ok0~1130) yangtze; urgency=medium
+
+  * Issues: https://gitee.com/openkylin/ukui-search/issues/I63XWF?from=project-issue
+  * 其他改动：无
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 30 Nov 2022 16:44:46 +0800
+
+ukui-search (3.22.2.0-ok0~1129build1) yangtze; urgency=medium
+
+  * 解决编译问题
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 29 Nov 2022 15:15:40 +0800
+
+ukui-search (3.22.2.0-ok0~1129) yangtze; urgency=medium
+
+  * Issues: NULL
+  * 任务号：无
+  * 其他改动：
+    - 增加搜索应用插件动态注册、注销、排序功能.
+    - 优化文件索引操作.
+    - 增加文件内容模糊搜索功能.
+  * 其他改动说明：
+    - 搜索应用插件的注册、注销与排序目前能通过命令行实现。
+    - 取消了文件索引服务的子进程，优化了文件索引的资源占用，索引
+      服务暂停后再启动采取增量更新模式。
+    - 增加了一个gsettings,用于设置文本内容的搜索模式。
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 29 Nov 2022 14:47:34 +0800
+
+ukui-search (3.22.1.0-ok1~1122) yangtze; urgency=medium
+
+  * 更新版本号
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 22 Nov 2022 11:26:21 +0800
+
+ukui-search (3.22.1.0-ok0~1122build2) yangzte; urgency=medium
+
+  * 更新版本号
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 22 Nov 2022 11:06:40 +0800
+
+ukui-search (3.22.1.0-ok0~1122build1) yangtze; urgency=medium
+
+  * 重新传包
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 22 Nov 2022 10:50:53 +0800
+
+ukui-search (3.22.1.0-ok0~1122) yangtze; urgency=medium
+
+  * Issues: https://gitee.com/openkylin/ukui-search/issues/I5XA17
+  * 任务号：无
+  * 其他改动：
+    - 修复结果列表文字显示不居中的问题.
+    - 搜索插件接口增加一个反向调用接口，应用搜索增加截图打开时隐藏主页面动作。
+    - 解决平板模式下，单机结果项直接执行打开动作，无法打开详情页的问题。
+    - Optimize the contructor and pluginUi function of search plugin for ukcc.
+    - 修复内容搜索详情页加载图片失败导致的崩溃问题.
+    - 修复控制面板搜索插件崩溃的问题.
+    - Fixed: the app-data-service crashes when the destructor is executed.
+    - 同步分词模块<更新分词基础词库内容，分词准确性提升>.
+  * 其他改动影响域：搜索UI；文本内容索引分词效果。
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 22 Nov 2022 10:29:34 +0800
+
+ukui-search (3.22.0.0-ok0~1021) yangtze; urgency=medium
+
+  * BUG: 无
+  * 任务号：无
+  * 其他改动：同步主线代码，增加平板桌面小插件功能
+  * 其他改动影响域：全局搜索自身，平板桌面小插件
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Fri, 21 Oct 2022 11:13:06 +0800
+
+ukui-search (3.14.0.6-ok2~0720) yangtze; urgency=medium
+
+  * BUG: 无
+  * 任务号：无
+  * 其他改动：使用kysdk接口，在wayland环境设置跳过任务栏和多任务视图显示；
+              Fix bug:The background dose not follow the theme to change. 
+  * 其他改动影响域：搜索UI显示。
+
+ -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 20 Jul 2022 15:04:40 +0800
+
+ukui-search (3.14.0.6-ok2~0716) yangtze; urgency=medium
+
+  * build for openKylin
+  * 适配应用管理器接口
+  * 使用kysdk接口以解决wayland环境下窗口位置异常问题双标题栏问题.
+  * 修改dbus参数由qurl改为qstring
+  * Add a mainWindow switch dbus interface.
+  * sync #125632 action label's color doesn't change whth the heightlight color.
+  * sync #127327 adjust the heightlight effect of ukcc plugin's switchbutton
+  * 处理富文本超长问题
+  * 优化关键词高亮方案，结果项显示效果适配主题框架。
+  * 修复标题栏展开置顶后，点击收回位置错误问题
+  * 增加正则表达式转义，避免偶现的搜索UI卡死现象。
+  * close-cd #127732 #127792 the ukui-search do not quit while losing focus.
+
+ -- Xie Wei <xiewei@kylinos.cn>  Sat, 18 Jun 2022 10:43:24 +0800
+
--- a/debian/control
+++ b/debian/control
@ -0,0 +1,118 @@
+Source: ukui-search
+Section: utils
+Priority: optional
+Maintainer: zhangpengfei <zhangpengfei@kylinos.cn>
+Build-Depends: debhelper-compat (=12),
+               pkgconf,
+               libgsettings-qt-dev,
+               qtbase5-dev,
+               qt5-qmake,
+               qtchooser,
+               qtscript5-dev,
+               qttools5-dev-tools,
+               libxapian-dev,
+               libquazip5-dev(>=0.7.6-6build1),
+               libglib2.0-dev,
+               libkf5windowsystem-dev,
+               libqt5x11extras5-dev,
+               libuchardet-dev,
+               libpoppler-qt5-dev,
+               libukui-log4qt-dev,
+               libqt5xdg-dev,
+               libukcc-dev,
+               libtesseract-dev,
+               libleptonica-dev,
+               libkysdk-waylandhelper-dev,
+               libkysdk-qtwidgets-dev,
+               libukui-appwidget-manager-dev,
+               libukui-appwidget-provider-dev,
+               libukui-appwidget-qmlplugin0,
+               qml-module-org-ukui-stylehelper,
+               qtdeclarative5-dev
+Standards-Version: 4.5.0
+Rules-Requires-Root: no
+Homepage: https://www.ukui.org/
+Vcs-Git:  https://gitee.com/openkylin/ukui-search.git
+Vcs-Browser: https://gitee.com/openkylin/ukui-search
+
+Package: ukui-search
+Architecture: any
+Depends: ${misc:Depends},
+         ${shlibs:Depends},
+         libukui-search2 (= ${binary:Version}),
+         ukui-search-service (= ${binary:Version}),
+Description: A user-wide desktop search
+ feature of UKUI desktop environment.
+
+Package: ukui-search-service
+Architecture: any
+Depends: ${misc:Depends},
+         ${shlibs:Depends},
+Replaces: libukui-search0,libukui-search2 (<=4.0.1.1-ok0~0419)
+Breaks: libukui-search0,libukui-search2 (<=4.0.1.1-ok0~0419)
+Description: Data service for search function in UKUI desktop environment.
+
+Package: libchinese-segmentation1
+Section: libs
+Architecture: any
+Depends: ${misc:Depends},
+         ${shlibs:Depends},
+         libchinese-segmentation-common (= ${source:Version}),
+Description: Libraries for chinese-segmentation
+ .
+ This package contains a runtime library needed by 
+ ukui-search's file index function.
+
+Package: libchinese-segmentation-common
+Architecture: any
+Depends: ${misc:Depends},
+Replaces: libchinese-segmentation0,libchinese-segmentation1 (<=4.0.1.1-ok0~0419)
+Breaks: libchinese-segmentation0,libchinese-segmentation1 (<=4.0.1.1-ok0~0419)
+Description: Extra files for chinese-segmentation
+ .
+ This package contains dicts used by chinese-segmentation.
+
+Package: libchinese-segmentation-dev
+Section: libdevel
+Architecture: any
+Depends: ${misc:Depends},
+         libchinese-segmentation1 (= ${binary:Version}),
+Description: Libraries for chinese-segmentation(development files).
+
+Package: libukui-search2
+Section: libs
+Architecture: any
+Depends: ${misc:Depends},
+         ${shlibs:Depends},
+         libchinese-segmentation1 (= ${binary:Version}),
+         libukui-search-common (= ${source:Version}),
+         ukui-search-systemdbus (= ${binary:Version})
+Provides: libukui-search,
+Description: Libraries for ukui-search. 
+ .
+ This package contains a runtime library needed by
+ ukui-search and it's extensions.
+
+Package: libukui-search-common
+Architecture: any
+Depends: ${misc:Depends},
+Replaces: ukui-search (<= 3.14.0.6-ok2~0720),libukui-search0,libukui-search2 (<=4.0.1.1-ok0~0419)
+Breaks: ukui-search (<= 3.14.0.6-ok2~0720),libukui-search0,libukui-search2 (<=4.0.1.1-ok0~0419)
+Description: Extra files for libukui-search
+ .
+ This package contains some extra files for libukui-search,
+ for now, translation files only.
+
+Package: libukui-search-dev
+Section: libdevel
+Architecture: any
+Depends: ${misc:Depends},
+         libukui-search2 (= ${binary:Version}),
+Description: Libraries for ukui-search(development files).
+
+Package: ukui-search-systemdbus
+Architecture: any
+Depends: ${shlibs:Depends}, 
+         ${misc:Depends},
+Description: Systembus interface to modify max_user_watches nums 
+ permanent.
--- a/debian/copyright
+++ b/debian/copyright
@ -0,0 +1,67 @@
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+
+Files: *
+Copyright: 2020, KylinSoft Co., Ltd.
+License: GPL-3+
+
+Files: libchinese-segmentation/cppjieba/limonp/Md5.hpp
+Copyright: 1991, 1992, RSA Data Security, Inc. Created 1991
+License: NTP
+
+Files: src/singleapplication/*
+Copyright: 2013, Digia Plc and/or its subsidiary(-ies)
+License: BSD-3-clause
+
+Files: ukuisearch-systemdbus/*
+Copyright: 2019, Tianjin KYLIN Information Technology Co., Ltd.
+License: GPL-2+
+
+License: BSD-3-clause
+ This software is Copyright (c) 2021 by foo.
+ This is free software, licensed under:
+ The (three-clause) BSD License
+ The BSD License
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution. 
+ * Neither the name of foo nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission. 
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+License: GPL-2+
+ This software is Copyright (c) 2021 by foo.
+ This is free software, licensed under:
+ The GNU General Public License, Version 2, June 1991
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 dated June, 1991, or (at
+ your option) any later version.
+ On Debian systems, the complete text of version 2 of the GNU General
+ Public License can be found in '/usr/share/common-licenses/GPL-2'.
+
+License: GPL-3+
+ This software is Copyright (c) 2021 by foo.
+ This is free software, licensed under:
+ The GNU General Public License, Version 3, June 2007
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 3 dated June, 2007, or (at
+ your option) any later version.
+ On Debian systems, the complete text of version 3 of the GNU General
+ Public License can be found in '/usr/share/common-licenses/GPL-3'.
--- a/debian/libchinese-segmentation-common.install
+++ b/debian/libchinese-segmentation-common.install
@ -0,0 +1,2 @@
+/usr/share/ukui-search/res/dict/*.utf8
+/usr/share/ukui-search/res/dict/*.txt
--- a/debian/libchinese-segmentation-dev.install
+++ b/debian/libchinese-segmentation-dev.install
@ -0,0 +1,3 @@
+usr/include/chinese-seg/*
+usr/lib/*/pkgconfig/chinese-segmentation.pc
+usr/lib/*/libchinese-segmentation.so
--- a/debian/libchinese-segmentation1.install
+++ b/debian/libchinese-segmentation1.install
@ -0,0 +1 @@
+usr/lib/*/libchinese-segmentation.so.*
--- a/debian/libukui-search-common.install
+++ b/debian/libukui-search-common.install
@ -0,0 +1 @@
+libsearch/.qm/*.qm usr/share/ukui-search/translations
--- a/debian/libukui-search-dev.install
+++ b/debian/libukui-search-dev.install
@ -0,0 +1,3 @@
+usr/include/ukui-search/*
+usr/lib/*/pkgconfig/ukui-search.pc
+usr/lib/*/libukui-search.so
--- a/debian/libukui-search2.install
+++ b/debian/libukui-search2.install
@ -0,0 +1 @@
+usr/lib/*/libukui-search.so.*
--- a/debian/rules
+++ b/debian/rules
@ -0,0 +1,25 @@
+#!/usr/bin/make -f
+# See debhelper(7) (uncomment to enable)
+# output every command that modifies files on the build system.
+#export DH_VERBOSE = 1
+
+
+# see FEATURE AREAS in dpkg-buildflags(1)
+# export QT_SELECT=5
+
+# see ENVIRONMENT in dpkg-buildflags(1)
+# package maintainers to append CFLAGS
+#export DEB_CFLAGS_MAINT_APPEND  = -Wall -pedantic
+# package maintainers to append LDFLAGS
+#export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed
+
+
+%:
+	dh $@ 
+
+
+# dh_make generated override targets
+# This is example for Cmake (See https://bugs.debian.org/641051 )
+#override_dh_auto_configure:
+#	dh_auto_configure -- #	-DCMAKE_LIBRARY_PATH=$(DEB_HOST_MULTIARCH)
+
--- a/debian/source/format
+++ b/debian/source/format
@ -0,0 +1 @@
+3.0 (native)
--- a/debian/ukui-search-service.install
+++ b/debian/ukui-search-service.install
@ -0,0 +1,9 @@
+usr/bin/ukui-search-service
+usr/bin/ukui-search-app-data-service
+usr/bin/ukui-search-service-dir-manager
+etc/xdg/autostart/ukui-search-service-dir-manager.desktop
+etc/xdg/autostart/ukui-search-app-data-service.desktop
+etc/xdg/autostart/ukui-search-service.desktop
+usr/share/dbus-1/services/com.ukui.search.appdb.service
+usr/share/dbus-1/services/com.ukui.search.fileindex.service
+usr/share/glib-2.0/schemas/org.ukui.search.data.gschema.xml
--- a/debian/ukui-search-systemdbus.install
+++ b/debian/ukui-search-systemdbus.install
@ -0,0 +1,3 @@
+/usr/share/dbus-1/system-services/com.ukui.search.qt.systemdbus.service
+/etc/dbus-1/system.d/com.ukui.search.qt.systemdbus.conf
+/usr/bin/ukui-search-systemdbus
--- a/debian/ukui-search.install
+++ b/debian/ukui-search.install
@ -0,0 +1,16 @@
+usr/bin/ukui-search
+etc/xdg/autostart/ukui-search.desktop
+usr/share/applications/ukui-search-menu.desktop
+frontend/.qm/zh_CN.qm usr/share/ukui-search/translations
+frontend/.qm/bo_CN.qm usr/share/ukui-search/translations
+frontend/.qm/tr.qm usr/share/ukui-search/translations
+usr/share/glib-2.0/schemas/org.ukui.log4qt.ukui-search.gschema.xml
+usr/lib/*/ukui-control-center/*
+usr/share/ukui-search/search-ukcc-plugin/translations/*
+search-ukcc-plugin/.qm/*.qm usr/share/ukui-search/search-ukcc-plugin/translations
+usr/share/ukui-search/search-ukcc-plugin/image/*
+
+usr/share/dbus-1/services/org.ukui.appwidget.provider.search.service
+/usr/share/appwidget/*
+frontend/.qm/search_bo_CN.qm usr/share/appwidget/translations
+frontend/.qm/search_zh_CN.qm usr/share/appwidget/translations
--- a/1
+++ b/1
@ -1 +0,0 @@
-Subproject commit 4734827d7c31936f1485e4513316b05cb7c8714f
--- a/libchinese-segmentation/LICENSE
+++ b/libchinese-segmentation/LICENSE
@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
--- a/libchinese-segmentation/README.md
+++ b/libchinese-segmentation/README.md
@ -0,0 +1,170 @@
+# chinese-segmentation
+
+#### 介绍
+libchinese-segmentation工程以单例的形式分别提供了中文分词、汉字转拼音和中文繁体简体转换功能。
+
+接口文件分别为:
+chinese-segmentation.h
+libchinese-segmentation_global.h
+common-struct.h
+
+hanzi-to-pinyin.h
+pinyin4cpp-common.h
+
+Traditional-to-Simplified.h
+安装路径:/usr/include/chinese-seg
+
+#### 使用说明
+
+其中中文分词相关功能由chinese-segmentation.h提供接口，主要包括以下功能函数：
+
+```
+   static ChineseSegmentation *getInstance();//全局单例
+     /**
+     * @brief ChineseSegmentation::callSegment
+     * 调用extractor进行关键词提取，先使用Mix方式初步分词，再使用Idf词典进行关键词提取，只包含两字以上关键词
+     *
+     * @param sentence 要提取关键词的句子
+     * @return vector<KeyWord> 存放提取后关键词的信息的容器
+     */
+    vector<KeyWord> callSegment(const string &sentence);
+    vector<KeyWord> callSegment(QString &sentence);
+
+    /**
+     * @brief ChineseSegmentation::callMixSegmentCutStr
+     * 使用Mix方法进行分词，即先使用最大概率法MP初步分词，再用隐式马尔科夫模型HMM进一步分词，可以准确切出词典已有词和未登录词，结果比较准确
+     *
+     * @param sentence 要分词的句子
+     * @return vector<string> 只存放分词后每个词的内容的容器
+     */
+    vector<string> callMixSegmentCutStr(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callMixSegmentCutWord
+     * 和callMixSegmentCutStr功能相同
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callMixSegmentCutWord(const string& str);
+
+    /**
+     * @brief ChineseSegmentation::lookUpTagOfWord
+     * 查询word的词性
+     * @param word 要查询词性的词
+     * @return string word的词性
+     */
+    string lookUpTagOfWord(const string& word);
+
+    /**
+     * @brief ChineseSegmentation::getTagOfWordsInSentence
+     * 使用Mix分词后获取每个词的词性
+     * @param sentence 要分词的句子
+     * @return vector<pair<string, string>> 分词后的每个词的内容(firsr)和其对应的词性(second)
+     */
+    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
+
+    /**
+     * @brief ChineseSegmentation::callFullSegment
+     * 使用Full进行分词，Full会切出字典里所有的词。
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callFullSegment(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callQuerySegment
+     * 使用Query进行分词，即先使用Mix，对于长词再用Full，结果最精确，但词的数量也最大
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callQuerySegment(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callHMMSegment
+     * 使用隐式马尔科夫模型HMM进行分词
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callHMMSegment(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callMPSegment
+     * 使用最大概率法MP进行分词
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callMPSegment(const string& sentence);
+
+```
+
+汉字转拼音相关功能由hanzi-to-pinyin.h提供接口，主要包括以下功能函数：
+
+```
+    static HanZiToPinYin * getInstance();//全局单例
+
+    /**
+     * @brief HanZiToPinYin::isMultiTone 判断是否为多音字/词/句
+     * @param word 要判断的字/词/句
+     * @return bool 不是返回false
+     */
+    bool isMultiTone(string &word);
+    bool isMultiTone(string &&word);
+    bool isMultiTone(const string &word);
+    bool isMultiTone(const string &&word);
+
+    /**
+     * @brief HanZiToPinYin::contains 查询某个字/词/句是否有拼音（是否在数据库包含）
+     * @param word 要查询的字/词/句
+     * @return bool 数据库不包含返回false
+     */
+    bool contains(string &word);
+
+    /**
+     * @brief HanZiToPinYin::getResults 获取某个字/词/句的拼音
+     * @param word 要获取拼音的字/词/句
+     * @param results word的拼音列表（有可能多音字），每次调用results会被清空
+     * @return int 获取到返回0，否则返回-1
+     */
+    int getResults(string word, QStringList &results);
+
+    /**
+     * @brief setConfig 设置HanZiToPinYin的各项功能，详见pinyin4cpp-common.h
+     * @param dataStyle 返回数据风格，默认defult
+     * @param segType 是否启用分词，默认启用
+     * @param polyphoneType 是否启用多音字，默认不启用
+     * @param processType 无拼音数据处理模式，默认defult
+     */
+    void setConfig(PinyinDataStyle dataStyle,SegType segType,PolyphoneType polyphoneType,ExDataProcessType processType);
+
+```
+
+中文繁体转简体相关功能由Traditional-to-Simplified.h提供接口，主要包括以下功能函数：
+
+```
+    static Traditional2Simplified * getInstance();//全局单例
+    /**
+     * @brief Traditional2Simplified::isMultiTone 判断是否为繁体字，是则返回true
+     * @param oneWord 要判断的字
+     * @return bool 不是返回false
+     */
+    bool isTraditional(string &oneWord);
+
+    /**
+     * @brief Traditional2Simplified::getResults 转换某个字/词/句的繁体字
+     * @param words 要转换为简体中文的字/词/句
+     * @return words 的简体中文结果
+     */
+    string getResults(string words);
+
+```
+
+除此之外工程中提供了测试程序位于chinese-segmentation/test，运行界面如下：
+![输入图片说明](https://foruda.gitee.com/images/1682048388802220746/245a2ec3_8021248.png "image.png")
+
+#### 参与贡献
+
+1.  Fork 本仓库
+2.  新建分支
+3.  提交代码
+4.  新建 Pull Request
+
--- a/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified.pri
+++ b/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified.pri
@ -0,0 +1,10 @@
+INCLUDEPATH += $$PWD
+
+HEADERS += \
+    $$PWD/Traditional2Simplified_trie.h
+
+SOURCES += \
+    $$PWD/Traditional2Simplified_trie.cpp
+
+DISTFILES += \
+    Traditional-Chinese-Simplified-conversion/dict/TraditionalChineseSimplifiedDict.txt 
--- a/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified_trie.cpp
+++ b/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified_trie.cpp
@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2023, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#include "Traditional2Simplified_trie.h"
+
+Traditional2SimplifiedTrie::Traditional2SimplifiedTrie(string dat_cache_path)
+    : StorageBase<char, false, CacheFileHeaderBase>(vector<string>{TRADITIONAL_CHINESE_SIMPLIFIED_DICT_PATH}, dat_cache_path)
+{
+    this->Init();
+}
+
+Traditional2SimplifiedTrie::Traditional2SimplifiedTrie(const vector<string> file_paths, string dat_cache_path)
+    : StorageBase<char, false, CacheFileHeaderBase>(file_paths, dat_cache_path)
+{
+    this->Init();
+}
+
+bool Traditional2SimplifiedTrie::IsTraditional(const string &word) {
+    string result = this->Find(word);
+    if (!result.empty())
+        return true;
+    return false;
+}
+
+void Traditional2SimplifiedTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
+{
+    CacheFileHeaderBase header;
+    assert(sizeof(header.md5_hex) == md5.size());
+    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
+    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+    umask(S_IWGRP | S_IWOTH);
+    const int fd =mkstemp((char *)tmp_filepath.data());
+    assert(fd >= 0);
+    fchmod(fd, 0644);
+
+    write_bytes = write(fd, (const char *)&header, sizeof(CacheFileHeaderBase));
+
+    this->LoadDict(fd, write_bytes, offset, elements_num);
+
+    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
+
+    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
+    write(fd, &elements_num, sizeof(int));
+    write(fd, &offset, sizeof(int));
+    data_trie_size = this->GetDataTrieSize();
+    write(fd, &data_trie_size, sizeof(int));
+
+    close(fd);
+    assert((size_t)write_bytes == sizeof(CacheFileHeaderBase) + offset + this->GetDataTrieTotalSize());
+
+    tryRename(tmp_filepath, dat_cache_file);
+}
+
+string Traditional2SimplifiedTrie::Find(const string &key)
+{
+    int result = this->ExactMatchSearch(key.c_str(), key.size());
+    if (result < 0)
+        return string();
+    return string(&this->GetElementPtr()[result]);
+}
+
+void Traditional2SimplifiedTrie::LoadDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
+{
+    ifstream ifs(TRADITIONAL_CHINESE_SIMPLIFIED_DICT_PATH);
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, ":");
+        if (buf.size() != 2)
+            continue;
+        this->Update(buf[0].c_str(), buf[0].size(), offset);
+        offset += (buf[1].size() + 1);
+        elements_num++;
+        write_bytes += write(fd, buf[1].c_str(), buf[1].size() + 1);
+    }
+}
--- a/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified_trie.h
+++ b/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified_trie.h
@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2023, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef Traditional2SimplifiedTrie_H
+#define Traditional2SimplifiedTrie_H
+
+#include "storage-base.hpp"
+
+const char * const  TRADITIONAL_CHINESE_SIMPLIFIED_DICT_PATH = "/usr/share/ukui-search/res/dict/TraditionalChineseSimplifiedDict.txt";
+
+class Traditional2SimplifiedTrie : public StorageBase<char, false, CacheFileHeaderBase>
+{
+public:
+    Traditional2SimplifiedTrie(string dat_cache_path = "");
+    Traditional2SimplifiedTrie(const vector<string> file_paths, string dat_cache_path = "");
+    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
+    string Find(const string &key);
+    bool IsTraditional(const string &word);
+
+private:
+    void LoadDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
+};
+
+#endif // Traditional2SimplifiedTrie_H
--- a/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/dict/TraditionalChineseSimplifiedDict.txt
+++ b/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/dict/TraditionalChineseSimplifiedDict.txt
--- a/libchinese-segmentation/Traditional-to-Simplified-private.h
+++ b/libchinese-segmentation/Traditional-to-Simplified-private.h
@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2023, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#ifndef Traditional2SimplifiedPRIVATE_H
+#define Traditional2SimplifiedPRIVATE_H
+
+#include <QtCore/qglobal.h>
+#include <QHash>
+#include "Traditional-to-Simplified.h"
+#include "Traditional2Simplified_trie.h"
+
+using namespace std;
+
+class TRADITIONAL_CHINESE_SIMPLIFIED_EXPORT Traditional2SimplifiedPrivate
+{
+public:
+    Traditional2SimplifiedPrivate(Traditional2Simplified *parent = nullptr);
+    ~Traditional2SimplifiedPrivate();
+
+public:
+    bool isTraditional(string &word) {return m_Traditional2SimplifiedTrie.IsTraditional(word);}
+
+    string getResults(string words);
+
+private:
+
+    Traditional2Simplified *q = nullptr;
+    Traditional2SimplifiedTrie m_Traditional2SimplifiedTrie;
+};
+#endif // Traditional2SimplifiedPRIVATE_H
--- a/libchinese-segmentation/Traditional-to-Simplified.cpp
+++ b/libchinese-segmentation/Traditional-to-Simplified.cpp
@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2023, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#include <mutex>
+#include <cctype>
+#include "Traditional-to-Simplified.h"
+#include "Traditional-to-Simplified-private.h"
+#include "cppjieba/Unicode.hpp"
+
+Traditional2Simplified * Traditional2Simplified::g_Traditional2SimplifiedManager = nullptr;
+std::once_flag g_Traditional2SimplifiedSingleFlag;
+
+string Traditional2SimplifiedPrivate::getResults(string words)
+{
+    string results;
+    if (words.empty()) {
+        return words;
+    } else if (cppjieba::IsSingleWord(words)) {//单个字符
+        results = m_Traditional2SimplifiedTrie.Find(words);
+        if (results.empty()) {
+            results = words;//原数据返回
+        }
+    } else {//多个字符
+        string oneWord;
+        string data;
+        cppjieba::RuneStrArray runeArray;
+        cppjieba::DecodeRunesInString(words, runeArray);
+        for (auto i = runeArray.begin(); i != runeArray.end(); ++i) {
+            oneWord = cppjieba::GetStringFromRunes(words, i, i);
+            data = m_Traditional2SimplifiedTrie.Find(oneWord);
+            if (data.empty()) {//单字无结果
+                results.append(oneWord);
+            } else {
+                results.append(data);
+            }
+        }
+    }
+    return results;
+}
+
+Traditional2SimplifiedPrivate::Traditional2SimplifiedPrivate(Traditional2Simplified *parent) : q(parent)
+{
+}
+
+Traditional2SimplifiedPrivate::~Traditional2SimplifiedPrivate()
+{
+}
+
+Traditional2Simplified * Traditional2Simplified::getInstance()
+{
+    call_once(g_Traditional2SimplifiedSingleFlag, []() {
+        g_Traditional2SimplifiedManager = new Traditional2Simplified;
+    });
+    return g_Traditional2SimplifiedManager;
+}
+
+bool Traditional2Simplified::isTraditional(string &oneWord)
+{
+    return d->isTraditional(oneWord);
+}
+
+string Traditional2Simplified::getResults(string words)
+{
+    return d->getResults(words);
+}
+
+Traditional2Simplified::Traditional2Simplified() : d(new Traditional2SimplifiedPrivate)
+{
+}
--- a/libchinese-segmentation/Traditional-to-Simplified.h
+++ b/libchinese-segmentation/Traditional-to-Simplified.h
@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2023, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#ifndef Traditional2Simplified_H
+#define Traditional2Simplified_H
+
+#include <QtCore/qglobal.h>
+#include <string>
+#define TRADITIONAL_CHINESE_SIMPLIFIED_EXPORT Q_DECL_IMPORT
+
+using namespace std;
+
+class Traditional2SimplifiedPrivate;
+class TRADITIONAL_CHINESE_SIMPLIFIED_EXPORT Traditional2Simplified
+{
+public:
+    static Traditional2Simplified * getInstance();
+
+public:
+    /**
+     * @brief Traditional2Simplified::isMultiTone 判断是否为繁体字，是则返回true
+     * @param oneWord 要判断的字
+     * @return bool 不是返回false
+     */
+    bool isTraditional(string &oneWord);
+
+    /**
+     * @brief Traditional2Simplified::getResults 转换某个字/词/句的繁体字
+     * @param words 要转换为简体中文的字/词/句
+     * @return words 的简体中文结果
+     */
+    string getResults(string words);
+
+protected:
+    Traditional2Simplified();
+    ~Traditional2Simplified();
+    Traditional2Simplified(const Traditional2Simplified&) = delete;
+    Traditional2Simplified& operator =(const Traditional2Simplified&) = delete;
+private:
+    static Traditional2Simplified *g_Traditional2SimplifiedManager;
+    Traditional2SimplifiedPrivate *d = nullptr;
+};
+
+#endif // PINYINMANAGER_H
--- a/libchinese-segmentation/chinese-segmentation-private.h
+++ b/libchinese-segmentation/chinese-segmentation-private.h
@ -0,0 +1,34 @@
+#ifndef CHINESESEGMENTATIONPRIVATE_H
+#define CHINESESEGMENTATIONPRIVATE_H
+
+#include "chinese-segmentation.h"
+#include "cppjieba/Jieba.hpp"
+#include "cppjieba/KeywordExtractor.hpp"
+
+class ChineseSegmentationPrivate
+{
+public:
+    explicit ChineseSegmentationPrivate(ChineseSegmentation *parent = nullptr);
+    ~ChineseSegmentationPrivate();
+    vector<KeyWord> callSegment(const string& sentence);
+    vector<KeyWord> callSegment(QString& sentence);
+
+    vector<string> callMixSegmentCutStr(const string& sentence);
+    vector<Word> callMixSegmentCutWord(const string& sentence);
+    string lookUpTagOfWord(const string& word);
+    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
+
+    vector<Word> callFullSegment(const string& sentence);
+
+    vector<Word> callQuerySegment(const string& sentence);
+
+    vector<Word> callHMMSegment(const string& sentence);
+
+    vector<Word> callMPSegment(const string& sentence);
+
+private:
+    cppjieba::Jieba *m_jieba;
+    ChineseSegmentation *q = nullptr;
+};
+
+#endif // CHINESESEGMENTATIONPRIVATE_H
--- a/libchinese-segmentation/chinese-segmentation.cpp
+++ b/libchinese-segmentation/chinese-segmentation.cpp
@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2020, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: zhangzihao <zhangzihao@kylinos.cn>
+ * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
+ *
+ */
+#include "chinese-segmentation.h"
+#include "chinese-segmentation-private.h"
+
+ChineseSegmentationPrivate::ChineseSegmentationPrivate(ChineseSegmentation *parent) : q(parent)
+{
+    //const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
+    const char * const  HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
+    //const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
+    //const char * const  IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
+    const char * const  STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
+    m_jieba = new cppjieba::Jieba(DICT_PATH,
+                                  HMM_PATH,
+                                  USER_DICT_PATH,
+                                  IDF_DICT_PATH,
+                                  STOP_WORD_PATH,
+                                  "");
+}
+
+ChineseSegmentationPrivate::~ChineseSegmentationPrivate() {
+    if(m_jieba)
+        delete m_jieba;
+    m_jieba = nullptr;
+}
+
+vector<KeyWord> ChineseSegmentationPrivate::callSegment(const string &sentence) {
+    const size_t topk = -1;
+    vector<KeyWord> keywordres;
+    ChineseSegmentationPrivate::m_jieba->extractor.Extract(sentence, keywordres, topk);
+
+    return keywordres;
+
+}
+
+vector<KeyWord> ChineseSegmentationPrivate::callSegment(QString &sentence) {
+    //'\xEF\xBC\x8C' is "，" "\xE3\x80\x82" is "。"  use three " " to replace ,to ensure the offset info.
+    sentence = sentence.replace("\t", " ").replace("\xEF\xBC\x8C", "   ").replace("\xE3\x80\x82", "   ");
+    const size_t topk = -1;
+    vector<KeyWord> keywordres;
+    ChineseSegmentationPrivate::m_jieba->extractor.Extract(sentence.left(20480000).toStdString(), keywordres, topk);
+
+    return keywordres;
+
+}
+
+vector<string> ChineseSegmentationPrivate::callMixSegmentCutStr(const string &sentence)
+{
+    vector<string> keywordres;
+    ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
+    return keywordres;
+}
+
+vector<Word> ChineseSegmentationPrivate::callMixSegmentCutWord(const string &sentence)
+{
+    vector<Word> keywordres;
+    ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
+    return keywordres;
+}
+
+string ChineseSegmentationPrivate::lookUpTagOfWord(const string &word)
+{
+    return ChineseSegmentationPrivate::m_jieba->LookupTag(word);
+}
+
+vector<pair<string, string>> ChineseSegmentationPrivate::getTagOfWordsInSentence(const string &sentence)
+{
+     vector<pair<string, string>> words;
+     ChineseSegmentationPrivate::m_jieba->Tag(sentence, words);
+     return words;
+}
+
+vector<Word> ChineseSegmentationPrivate::callFullSegment(const string &sentence)
+{
+    vector<Word> keywordres;
+    ChineseSegmentationPrivate::m_jieba->CutAll(sentence, keywordres);
+    return keywordres;
+}
+
+vector<Word> ChineseSegmentationPrivate::callQuerySegment(const string &sentence)
+{
+    vector<Word> keywordres;
+    ChineseSegmentationPrivate::m_jieba->CutForSearch(sentence, keywordres);
+    return keywordres;
+}
+
+vector<Word> ChineseSegmentationPrivate::callHMMSegment(const string &sentence)
+{
+    vector<Word> keywordres;
+    ChineseSegmentationPrivate::m_jieba->CutHMM(sentence, keywordres);
+    return keywordres;
+}
+
+vector<Word> ChineseSegmentationPrivate::callMPSegment(const string &sentence)
+{
+    size_t maxWordLen = 512;
+    vector<Word> keywordres;
+    ChineseSegmentationPrivate::m_jieba->CutSmall(sentence, keywordres, maxWordLen);
+    return keywordres;
+}
+
+ChineseSegmentation *ChineseSegmentation::getInstance()
+{
+    static ChineseSegmentation *global_instance_chinese_segmentation = new ChineseSegmentation;
+    return global_instance_chinese_segmentation;
+}
+
+vector<KeyWord> ChineseSegmentation::callSegment(const string &sentence)
+{
+    return d->callSegment(sentence);
+}
+
+vector<KeyWord> ChineseSegmentation::callSegment(QString &sentence)
+{
+    return d->callSegment(sentence);
+}
+
+vector<string> ChineseSegmentation::callMixSegmentCutStr(const string &sentence)
+{
+    return d->callMixSegmentCutStr(sentence);
+}
+
+vector<Word> ChineseSegmentation::callMixSegmentCutWord(const string &str)
+{
+    return d->callMixSegmentCutWord(str);
+}
+
+string ChineseSegmentation::lookUpTagOfWord(const string &word)
+{
+    return d->lookUpTagOfWord(word);
+}
+
+vector<pair<string, string> > ChineseSegmentation::getTagOfWordsInSentence(const string &sentence)
+{
+    return d->getTagOfWordsInSentence(sentence);
+}
+
+vector<Word> ChineseSegmentation::callFullSegment(const string &sentence)
+{
+    return d->callFullSegment(sentence);
+}
+
+vector<Word> ChineseSegmentation::callQuerySegment(const string &sentence)
+{
+    return d->callQuerySegment(sentence);
+}
+
+vector<Word> ChineseSegmentation::callHMMSegment(const string &sentence)
+{
+    return d->callHMMSegment(sentence);
+}
+
+vector<Word> ChineseSegmentation::callMPSegment(const string &sentence)
+{
+    return d->callMPSegment(sentence);
+}
+
+ChineseSegmentation::ChineseSegmentation() : d(new ChineseSegmentationPrivate)
+{
+}
--- a/libchinese-segmentation/chinese-segmentation.h
+++ b/libchinese-segmentation/chinese-segmentation.h
@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2020, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: zhangzihao <zhangzihao@kylinos.cn>
+ * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
+ *
+ */
+#ifndef CHINESESEGMENTATION_H
+#define CHINESESEGMENTATION_H
+
+#include <QString>
+#include "libchinese-segmentation_global.h"
+#include "common-struct.h"
+
+class ChineseSegmentationPrivate;
+class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
+public:
+    static ChineseSegmentation *getInstance();
+
+    /**
+     * @brief ChineseSegmentation::callSegment
+     * 调用extractor进行关键词提取，先使用Mix方式初步分词，再使用Idf词典进行关键词提取，只包含两字以上关键词
+     *
+     * @param sentence 要提取关键词的句子
+     * @return vector<KeyWord> 存放提取后关键词的信息的容器
+     */
+    vector<KeyWord> callSegment(const string &sentence);
+    vector<KeyWord> callSegment(QString &sentence);
+
+    /**
+     * @brief ChineseSegmentation::callMixSegmentCutStr
+     * 使用Mix方法进行分词，即先使用最大概率法MP初步分词，再用隐式马尔科夫模型HMM进一步分词，可以准确切出词典已有词和未登录词，结果比较准确
+     *
+     * @param sentence 要分词的句子
+     * @return vector<string> 只存放分词后每个词的内容的容器
+     */
+    vector<string> callMixSegmentCutStr(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callMixSegmentCutWord
+     * 和callMixSegmentCutStr功能相同
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callMixSegmentCutWord(const string& str);
+
+    /**
+     * @brief ChineseSegmentation::lookUpTagOfWord
+     * 查询word的词性
+     * @param word 要查询词性的词
+     * @return string word的词性
+     */
+    string lookUpTagOfWord(const string& word);
+
+    /**
+     * @brief ChineseSegmentation::getTagOfWordsInSentence
+     * 使用Mix分词后获取每个词的词性
+     * @param sentence 要分词的句子
+     * @return vector<pair<string, string>> 分词后的每个词的内容(firsr)和其对应的词性(second)
+     */
+    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
+
+    /**
+     * @brief ChineseSegmentation::callFullSegment
+     * 使用Full进行分词，Full会切出字典里所有的词。
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callFullSegment(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callQuerySegment
+     * 使用Query进行分词，即先使用Mix，对于长词再用Full，结果最精确，但词的数量也最大
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callQuerySegment(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callHMMSegment
+     * 使用隐式马尔科夫模型HMM进行分词
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callHMMSegment(const string& sentence);
+
+    /**
+     * @brief ChineseSegmentation::callMPSegment
+     * 使用最大概率法MP进行分词
+     * @param sentence 要分词的句子
+     * @return vector<Word> 存放分词后每个词所有信息的容器
+     */
+    vector<Word> callMPSegment(const string& sentence);
+
+private:
+    explicit ChineseSegmentation();
+    ~ChineseSegmentation() = default;
+    ChineseSegmentation(const ChineseSegmentation&) = delete;
+    ChineseSegmentation& operator =(const ChineseSegmentation&) = delete;
+
+private:
+    ChineseSegmentationPrivate *d = nullptr;
+};
+
+#endif // CHINESESEGMENTATION_H
--- a/libchinese-segmentation/common-struct.h
+++ b/libchinese-segmentation/common-struct.h
@ -0,0 +1,52 @@
+#ifndef COMMONSTRUCT_H
+#define COMMONSTRUCT_H
+
+#include <string>
+#include <vector>
+
+using namespace std;
+
+/**
+ * @brief The KeyWord struct
+ *
+ * @property word the content of keyword
+ * @property offsets the Unicode offsets, can be used to check the word pos in a sentence
+ * @property weight the weight of the keyword
+ */
+
+struct KeyWord {
+    string word;
+    vector<size_t> offsets;
+    double weight;
+    ~KeyWord() {
+        word = std::move("");
+        offsets.clear();
+        offsets.shrink_to_fit();
+    }
+};
+
+/**
+ * @brief The Word struct
+ *
+ * @property word the content of word
+ * @property offset the offset of the word(absolute pos, Chinese 3 , English 1)， can be used to check the word pos in a sentence
+ * @property unicode_offset the Unicode offset of the word
+ * @property unicode_length the Unicode length of the word
+ */
+struct Word {
+    string word;
+    uint32_t offset;
+    uint32_t unicode_offset;
+    uint32_t unicode_length;
+    Word(const string& w, uint32_t o)
+        : word(w), offset(o) {
+    }
+    Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
+        : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
+    }
+    ~Word() {
+        word = std::move("");
+    }
+}; // struct Word
+
+#endif // COMMONSTRUCT_H
--- a/libchinese-segmentation/cppjieba/DatTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DatTrie.hpp
@ -0,0 +1,641 @@
+#pragma once
+
+#include <stdint.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <QDebug>
+
+#include <algorithm>
+#include <utility>
+
+#include "limonp/Md5.hpp"
+#include "Unicode.hpp"
+//#define USE_DARTS_CLONE
+#ifdef USE_DARTS_CLONE
+#include "../storage-base/darts-clone/darts.h"
+#else
+#include "../storage-base/cedar/cedar.h"
+#endif
+
+namespace cppjieba {
+
+using std::pair;
+
+struct DatElement {
+    string word;
+    string tag;
+    double weight = 0;
+
+    bool operator < (const DatElement & b) const {
+        if (word == b.word) {
+            return this->weight > b.weight;
+        }
+
+        return this->word < b.word;
+    }
+};
+
+struct IdfElement {
+    string word;
+    double idf = 0;
+
+    bool operator < (const IdfElement & b) const {
+        if (word == b.word) {
+            return this->idf > b.idf;
+        }
+
+        return this->word < b.word;
+    }
+};
+
+struct PinYinElement
+{
+    string word;
+    string tag;
+
+    bool operator < (const DatElement & b) const {
+        return this->word < b.word;
+    }
+};
+
+inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
+    return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
+}
+
+struct PinYinMemElem {
+    char tag[6] = {};
+
+    void SetTag(const string & str) {
+        memset(&tag[0], 0, sizeof(tag));
+        strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
+    }
+
+    string GetTag() const {
+        return &tag[0];
+    }
+};
+
+inline std::ostream & operator << (std::ostream& os, const DatMemElem & elem) {
+    return os << "/tag=" << elem.GetTag() << "/weight=" << elem.weight;
+}
+#ifdef USE_DARTS_CLONE
+typedef Darts::DoubleArray JiebaDAT;
+#else
+typedef cedar::da<int, -1, -2, false> JiebaDAT;
+#endif
+
+
+struct CacheFileHeader {
+    char md5_hex[32] = {};
+    double min_weight = 0;
+    uint32_t elements_num = 0;
+    uint32_t dat_size = 0;
+};
+
+static_assert(sizeof(DatMemElem) == 16, "DatMemElem length invalid");
+static_assert((sizeof(CacheFileHeader) % sizeof(DatMemElem)) == 0, "DatMemElem CacheFileHeader length equal");
+
+
+class DatTrie {
+public:
+    DatTrie() {}
+    ~DatTrie() {
+        ::munmap(mmap_addr_, mmap_length_);
+        mmap_addr_ = nullptr;
+        mmap_length_ = 0;
+
+        ::close(mmap_fd_);
+        mmap_fd_ = -1;
+    }
+
+    const DatMemElem * Find(const string & key) const {
+#ifdef USE_DARTS_CLONE
+        JiebaDAT::result_pair_type find_result;
+        dat_.exactMatchSearch(key.c_str(), find_result);
+
+        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
+            return nullptr;
+        }
+
+        return &elements_ptr_[ find_result.value ];
+#else
+        int result = dat_.exactMatchSearch<int>(key.c_str());
+        if (result < 0)
+            return nullptr;
+        return &elements_ptr_[result];
+#endif
+    }
+
+    const double Find(const string & key, std::size_t length, std::size_t node_pos) const {
+#ifdef USE_DARTS_CLONE
+        JiebaDAT::result_pair_type find_result;
+        dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos);
+
+        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
+            return -1;
+        }
+
+        return idf_elements_ptr_[ find_result.value ];
+#else
+        int result = dat_.exactMatchSearch<int>(key.c_str(), length, node_pos);
+        if (result < 0)
+            return -1;
+        return idf_elements_ptr_[result];
+#endif
+    }
+
+    const PinYinMemElem * PinYinFind(const string & key) const {
+#ifdef USE_DARTS_CLONE
+        JiebaDAT::result_pair_type find_result;
+        dat_.exactMatchSearch(key.c_str(), find_result);
+
+        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
+            return nullptr;
+        }
+
+        return &pinyin_elements_ptr_[ find_result.value ];
+#else
+        int result = dat_.exactMatchSearch<int>(key.c_str());
+        if (result < 0)
+            return nullptr;
+        return &pinyin_elements_ptr_[result];
+#endif
+    }
+
+    void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
+              vector<struct DatDag>&res, size_t max_word_len) const {
+
+        res.clear();
+        res.resize(end - begin);
+
+        string text_str;
+        EncodeRunesToString(begin, end, text_str);
+
+        static const size_t max_num = 128;
+        JiebaDAT::result_pair_type result_pairs[max_num] = {};
+
+        for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) {
+
+            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
+
+            res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + 1, nullptr));
+
+            for (std::size_t idx = 0; idx < num_results; ++idx) {
+                auto & match = result_pairs[idx];
+
+                if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
+                    continue;
+                }
+
+                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
+
+                if (char_num > max_word_len) {
+                    continue;
+                }
+
+                auto pValue = &elements_ptr_[match.value];
+
+                if (1 == char_num) {
+                    res[i].nexts[0].second = pValue;
+                    continue;
+                }
+
+                res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + char_num, pValue));
+            }
+
+            begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune);
+        }
+    }
+
+    /*
+    void Find_Reverse(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
+              vector<struct DatDag>&res, size_t max_word_len) const {
+
+        res.clear();
+        res.resize(end - begin);
+
+        string text_str;
+        EncodeRunesToString(begin, end, text_str);
+
+        static const size_t max_num = 128;
+        JiebaDAT::result_pair_type result_pairs[max_num] = {};
+
+        size_t str_size = end - begin;
+        for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
+
+            begin_pos -= (end - i - 1)->len;
+            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
+            res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - i, nullptr));
+
+            for (std::size_t idx = 0; idx < num_results; ++idx) {
+                auto & match = result_pairs[idx];
+                if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
+                    continue;
+                }
+
+                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
+
+                if (char_num > max_word_len) {
+                    continue;
+                }
+
+                auto pValue = &elements_ptr_[match.value];
+
+                if (1 == char_num) {
+                    res[str_size - i - 1].nexts[0].second = pValue;
+                    continue;
+                }
+
+                res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - 1 - i + char_num, pValue));
+            }
+        }
+    }*/
+
+    void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
+              vector<WordRange>& words, size_t max_word_len) const {
+
+        string text_str;
+        EncodeRunesToString(begin, end, text_str);
+
+        static const size_t max_num = 128;
+        JiebaDAT::result_pair_type result_pairs[max_num] = {};//存放字典查询结果
+        size_t str_size = end - begin;
+        double max_weight[str_size];//存放逆向路径最大weight
+        for (size_t i = 0; i<str_size; i++) {
+            max_weight[i] = -3.14e+100;
+        }
+        int max_next[str_size];//存放动态规划后的分词结果
+        //memset(max_next,-1,str_size);
+
+        double val(0);
+        for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
+            size_t nextPos = str_size - i;//逆向计算
+            begin_pos -= (end - i - 1)->len;
+
+            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
+            if (0 == num_results) {//字典不存在则单独分词
+                val = min_weight_;
+
+                if (nextPos  < str_size) {
+                    val += max_weight[nextPos];
+                }
+                if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
+                    max_weight[nextPos - 1] = val;
+                    max_next[nextPos - 1] = nextPos;
+                }
+            } else {//字典存在则根据查询结果数量计算最大概率路径
+                for (std::size_t idx = 0; idx < num_results; ++idx) {
+                    auto & match = result_pairs[idx];
+                    if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
+                        continue;
+                    }
+                    auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
+                    if (char_num > max_word_len) {
+                        continue;
+                    }
+                    auto pValue = &elements_ptr_[match.value];
+
+                    val = pValue->weight;
+                    if (1 == char_num) {
+                        if (nextPos  < str_size) {
+                            val += max_weight[nextPos];
+                        }
+                        if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
+                            max_weight[nextPos - 1] = val;
+                            max_next[nextPos - 1] = nextPos;
+                        }
+                    } else {
+                        if (nextPos - 1 + char_num  < str_size) {
+                            val += max_weight[nextPos - 1 + char_num];
+                        }
+                        if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) {
+                            max_weight[nextPos - 1] = val;
+                            max_next[nextPos - 1] = nextPos - 1 + char_num;
+                        }
+                    }
+                }
+            }
+        }
+        for (size_t i = 0; i < str_size;) {//统计动态规划结果
+            assert(max_next[i] > i);
+            assert(max_next[i] <= str_size);
+            WordRange wr(begin + i, begin + max_next[i] - 1);
+            words.push_back(wr);
+            i = max_next[i];
+        }
+    }
+    double GetMinWeight() const {
+        return min_weight_;
+    }
+
+    void SetMinWeight(double d) {
+        min_weight_ = d ;
+    }
+
+    bool InitBuildDat(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
+        BuildDatCache(elements, dat_cache_file, md5);
+        return InitAttachDat(dat_cache_file, md5);
+    }
+
+    bool InitBuildDat(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
+        BuildDatCache(elements, dat_cache_file, md5);
+        return InitIdfAttachDat(dat_cache_file, md5);
+    }
+
+    bool InitBuildDat(vector<PinYinElement>& elements, const string & dat_cache_file, const string & md5) {
+        BuildDatCache(elements, dat_cache_file, md5);
+        return InitPinYinAttachDat(dat_cache_file, md5);
+    }
+
+    bool InitAttachDat(const string & dat_cache_file, const string & md5) {
+        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
+
+        if (mmap_fd_ < 0) {
+            return false;
+        }
+
+        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
+        assert(seek_off >= 0);
+        mmap_length_ = seek_off;
+
+        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
+        assert(MAP_FAILED != mmap_addr_);
+
+        assert(mmap_length_ >= sizeof(CacheFileHeader));
+        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
+        elements_num_ = header.elements_num;
+        min_weight_ = header.min_weight;
+        assert(sizeof(header.md5_hex) == md5.size());
+
+        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
+            return false;
+        }
+
+        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(DatMemElem)  + header.dat_size * dat_.unit_size());
+        elements_ptr_ = (const DatMemElem *)(mmap_addr_ + sizeof(header));
+        char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(DatMemElem) * elements_num_;
+        dat_.set_array(dat_ptr, header.dat_size);
+        return true;
+    }
+
+    bool InitIdfAttachDat(const string & dat_cache_file, const string & md5) {
+        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
+
+        if (mmap_fd_ < 0) {
+            return false;
+        }
+
+        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
+        assert(seek_off >= 0);
+        mmap_length_ = seek_off;
+
+        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
+        assert(MAP_FAILED != mmap_addr_);
+
+        assert(mmap_length_ >= sizeof(CacheFileHeader));
+        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
+        elements_num_ = header.elements_num;
+        min_weight_ = header.min_weight;
+        assert(sizeof(header.md5_hex) == md5.size());
+
+        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
+            return false;
+        }
+
+        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double)  + header.dat_size * dat_.unit_size());
+        idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header));
+        char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_;
+        dat_.set_array(dat_ptr, header.dat_size);
+        return true;
+    }
+
+    bool InitPinYinAttachDat(const string & dat_cache_file, const string & md5) {
+        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
+
+        if (mmap_fd_ < 0) {
+            return false;
+        }
+
+        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
+        assert(seek_off >= 0);
+        mmap_length_ = seek_off;
+
+        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
+        assert(MAP_FAILED != mmap_addr_);
+
+        assert(mmap_length_ >= sizeof(CacheFileHeader));
+        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
+        elements_num_ = header.elements_num;
+        min_weight_ = header.min_weight;
+        assert(sizeof(header.md5_hex) == md5.size());
+
+        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
+            return false;
+        }
+
+        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(PinYinMemElem)  + header.dat_size * dat_.unit_size());
+        pinyin_elements_ptr_ = (const PinYinMemElem *)(mmap_addr_ + sizeof(header));
+        char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(PinYinMemElem) * elements_num_;
+        dat_.set_array(dat_ptr, header.dat_size);
+        return true;
+    }
+
+private:
+    void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
+        std::sort(elements.begin(), elements.end());
+
+        vector<const char*> keys_ptr_vec;
+        vector<int> values_vec;
+        vector<DatMemElem> mem_elem_vec;
+
+        keys_ptr_vec.reserve(elements.size());
+        values_vec.reserve(elements.size());
+        mem_elem_vec.reserve(elements.size());
+
+        CacheFileHeader header;
+        header.min_weight = min_weight_;
+        assert(sizeof(header.md5_hex) == md5.size());
+        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+        for (size_t i = 0; i < elements.size(); ++i) {
+            keys_ptr_vec.push_back(elements[i].word.data());
+            values_vec.push_back(i);
+            mem_elem_vec.push_back(DatMemElem());
+            auto & mem_elem = mem_elem_vec.back();
+            mem_elem.weight = elements[i].weight;
+            mem_elem.SetTag(elements[i].tag);
+        }
+
+        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
+        assert(0 == ret);
+        header.elements_num = mem_elem_vec.size();
+        header.dat_size = dat_.size();
+
+        {
+            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+            ::umask(S_IWGRP | S_IWOTH);
+            //const int fd =::mkstemp(&tmp_filepath[0]);
+            const int fd =::mkstemp((char *)tmp_filepath.data());
+            qDebug() << "mkstemp :" << errno << tmp_filepath.data();
+            assert(fd >= 0);
+            ::fchmod(fd, 0644);
+
+            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
+            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
+            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
+
+            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
+            ::close(fd);
+
+            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
+            assert(0 == rename_ret);
+        }
+    }
+
+    void BuildDatCache(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
+        std::sort(elements.begin(), elements.end());
+
+        vector<const char*> keys_ptr_vec;
+        vector<int> values_vec;
+        vector<double> mem_elem_vec;
+
+        keys_ptr_vec.reserve(elements.size());
+        values_vec.reserve(elements.size());
+        mem_elem_vec.reserve(elements.size());
+
+        CacheFileHeader header;
+        header.min_weight = min_weight_;
+        assert(sizeof(header.md5_hex) == md5.size());
+        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+        for (size_t i = 0; i < elements.size(); ++i) {
+            keys_ptr_vec.push_back(elements[i].word.data());
+            values_vec.push_back(i);
+            mem_elem_vec.push_back(elements[i].idf);
+        }
+
+        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
+        assert(0 == ret);
+        header.elements_num = mem_elem_vec.size();
+        header.dat_size = dat_.size();
+
+        {
+            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+            ::umask(S_IWGRP | S_IWOTH);
+            //const int fd =::mkstemp(&tmp_filepath[0]);
+            const int fd =::mkstemp((char *)tmp_filepath.data());
+            qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
+            assert(fd >= 0);
+            ::fchmod(fd, 0644);
+
+            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
+            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(double) * mem_elem_vec.size());
+            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
+
+            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(double) + dat_.total_size());
+            ::close(fd);
+
+            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
+            assert(0 == rename_ret);
+        }
+    }
+
+    void BuildDatCache(vector<PinYinElement>& elements, const string & dat_cache_file, const string & md5) {
+        //std::sort(elements.begin(), elements.end());
+
+        vector<const char*> keys_ptr_vec;
+        vector<int> values_vec;
+        vector<PinYinMemElem> mem_elem_vec;
+
+        keys_ptr_vec.reserve(elements.size());
+        values_vec.reserve(elements.size());
+        mem_elem_vec.reserve(elements.size());
+
+        CacheFileHeader header;
+        header.min_weight = min_weight_;
+        assert(sizeof(header.md5_hex) == md5.size());
+        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+        for (size_t i = 0; i < elements.size(); ++i) {
+            keys_ptr_vec.push_back(elements[i].word.data());
+            values_vec.push_back(i);
+            mem_elem_vec.push_back(PinYinMemElem());
+            auto & mem_elem = mem_elem_vec.back();
+            mem_elem.SetTag(elements[i].tag);
+        }
+
+        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
+        assert(0 == ret);
+        header.elements_num = mem_elem_vec.size();
+        header.dat_size = dat_.size();
+
+        {
+            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+            ::umask(S_IWGRP | S_IWOTH);
+            //const int fd =::mkstemp(&tmp_filepath[0]);
+            const int fd =::mkstemp((char *)tmp_filepath.data());
+            qDebug() << "mkstemp :" << errno << tmp_filepath.data();
+            assert(fd >= 0);
+            ::fchmod(fd, 0644);
+
+            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
+            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
+            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
+
+            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
+            ::close(fd);
+
+            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
+            assert(0 == rename_ret);
+        }
+    }
+
+    DatTrie(const DatTrie &);
+    DatTrie &operator=(const DatTrie &);
+
+private:
+    JiebaDAT dat_;
+    const DatMemElem * elements_ptr_ = nullptr;
+    const double * idf_elements_ptr_ = nullptr;
+    const PinYinMemElem * pinyin_elements_ptr_ = nullptr;
+    size_t elements_num_ = 0;
+    double min_weight_ = 0;
+
+    int mmap_fd_ = -1;
+    size_t mmap_length_ = 0;
+    char * mmap_addr_ = nullptr;
+};
+
+
+inline string CalcFileListMD5(const string & files_list, size_t & file_size_sum) {
+    limonp::MD5 md5;
+
+    const auto files = limonp::Split(files_list, "|;");
+    file_size_sum = 0;
+
+    for (auto const & local_path : files) {
+        const int fd = ::open(local_path.c_str(), O_RDONLY);
+        if( fd < 0){
+            continue;
+        }
+        auto const len = ::lseek(fd, 0, SEEK_END);
+        if (len > 0) {
+            void * addr = ::mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+            assert(MAP_FAILED != addr);
+
+            md5.Update((unsigned char *) addr, len);
+            file_size_sum += len;
+
+            ::munmap(addr, len);
+        }
+        ::close(fd);
+    }
+
+    md5.Final();
+    return string(md5.digestChars);
+}
+
+}
--- a/libchinese-segmentation/cppjieba/DictTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DictTrie.hpp
@ -0,0 +1,234 @@
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <string>
+#include <cstring>
+#include <cstdlib>
+#include <stdint.h>
+#include <cmath>
+#include <limits>
+#include "limonp/StringUtil.hpp"
+#include "limonp/Logging.hpp"
+#include "Unicode.hpp"
+#include "DatTrie.hpp"
+#include <QDebug>
+namespace cppjieba {
+
+using namespace limonp;
+
+const double MAX_DOUBLE = 3.14e+100;
+const size_t DICT_COLUMN_NUM = 3;
+const char* const UNKNOWN_TAG = "";
+
+class DictTrie {
+public:
+    enum UserWordWeightOption {
+        WordWeightMin,
+        WordWeightMedian,
+        WordWeightMax,
+    }; // enum UserWordWeightOption
+
+    DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "",
+             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
+        Init(dict_path, user_dict_paths, dat_cache_path, user_word_weight_opt);
+    }
+
+    ~DictTrie() {}
+
+    const DatMemElem* Find(const string & word) const {
+        return dat_.Find(word);
+    }
+
+    void FindDatDag(RuneStrArray::const_iterator begin,
+              RuneStrArray::const_iterator end,
+              vector<struct DatDag>&res,
+              size_t max_word_len = MAX_WORD_LENGTH) const {
+        dat_.Find(begin, end, res, max_word_len);
+    }
+
+    void FindWordRange(RuneStrArray::const_iterator begin,
+              RuneStrArray::const_iterator end,
+              vector<WordRange>& words,
+              size_t max_word_len = MAX_WORD_LENGTH) const {
+        dat_.Find(begin, end, words, max_word_len);
+    }
+
+    bool IsUserDictSingleChineseWord(const Rune& word) const {
+        return IsIn(user_dict_single_chinese_word_, word);
+    }
+
+    double GetMinWeight() const {
+        return dat_.GetMinWeight();
+    }
+
+    size_t GetTotalDictSize() const {
+        return total_dict_size_;
+    }
+
+    void InserUserDictNode(const string& line, bool saveNodeInfo = true) {
+        vector<string> buf;
+        DatElement node_info;
+        Split(line, buf, " ");
+
+        if (buf.size() == 0) {
+            return;
+        }
+
+        node_info.word = buf[0];
+        node_info.weight = user_word_default_weight_;
+        node_info.tag = UNKNOWN_TAG;
+
+        if (buf.size() == 2) {
+            node_info.tag = buf[1];
+        } else if (buf.size() == 3) {
+            if (freq_sum_ > 0.0) {
+                const int freq = atoi(buf[1].c_str());
+                node_info.weight = log(1.0 * freq / freq_sum_);
+                node_info.tag = buf[2];
+            }
+        }
+
+        if (saveNodeInfo) {
+            static_node_infos_.push_back(node_info);
+        }
+
+        if (Utf8CharNum(node_info.word) == 1) {
+            RuneArray word;
+
+            if (DecodeRunesInString(node_info.word, word)) {
+                user_dict_single_chinese_word_.insert(word[0]);
+            } else {
+                XLOG(ERROR) << "Decode " << node_info.word << " failed.";
+            }
+        }
+    }
+
+    void LoadUserDict(const string& filePaths, bool saveNodeInfo = true) {
+        vector<string> files = limonp::Split(filePaths, "|;");
+
+        for (size_t i = 0; i < files.size(); i++) {
+            ifstream ifs(files[i].c_str());
+            XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
+            string line;
+
+            for (; getline(ifs, line);) {
+                if (line.size() == 0) {
+                    continue;
+                }
+
+                InserUserDictNode(line, saveNodeInfo);
+            }
+        }
+    }
+
+
+private:
+    void Init(const string& dict_path, const string& user_dict_paths, string dat_cache_path,
+              UserWordWeightOption user_word_weight_opt) {
+        const auto dict_list = dict_path + "|" + user_dict_paths;
+        size_t file_size_sum = 0;
+        const string md5 = CalcFileListMD5(dict_list, file_size_sum);
+        total_dict_size_ = file_size_sum;
+
+        if (dat_cache_path.empty()) {
+            dat_cache_path = "/tmp/" + md5 + ".dat_";//未指定词库数据文件存储位置的默认存储在tmp目录下
+        }
+         dat_cache_path += VERSION;
+        QString path = QString::fromStdString(dat_cache_path);
+        qDebug() << "#########Dict path:" << path;
+        if (dat_.InitAttachDat(dat_cache_path, md5)) {
+            LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_;
+            return;
+        }
+
+        LoadDefaultDict(dict_path);
+        freq_sum_ = CalcFreqSum(static_node_infos_);
+        CalculateWeight(static_node_infos_, freq_sum_);
+        double min_weight = 0;
+        SetStaticWordWeights(user_word_weight_opt, min_weight);
+        dat_.SetMinWeight(min_weight);
+
+        LoadUserDict(user_dict_paths);
+        const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
+        assert(build_ret);
+        vector<DatElement>().swap(static_node_infos_);
+    }
+
+    void LoadDefaultDict(const string& filePath) {
+        ifstream ifs(filePath.c_str());
+        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
+        string line;
+        vector<string> buf;
+
+        for (; getline(ifs, line);) {
+            Split(line, buf, " ");
+            XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
+            DatElement node_info;
+            node_info.word = buf[0];
+            node_info.weight = atof(buf[1].c_str());
+            node_info.tag = buf[2];
+            static_node_infos_.push_back(node_info);
+        }
+    }
+
+    static bool WeightCompare(const DatElement& lhs, const DatElement& rhs) {
+        return lhs.weight < rhs.weight;
+    }
+
+    void SetStaticWordWeights(UserWordWeightOption option, double & min_weight) {
+        XCHECK(!static_node_infos_.empty());
+        vector<DatElement> x = static_node_infos_;
+        sort(x.begin(), x.end(), WeightCompare);
+        if(x.empty()){
+            return;
+        }
+        min_weight = x[0].weight;
+        const double max_weight_ = x[x.size() - 1].weight;
+        const double median_weight_ = x[x.size() / 2].weight;
+
+        switch (option) {
+            case WordWeightMin:
+                user_word_default_weight_ = min_weight;
+                break;
+
+            case WordWeightMedian:
+                user_word_default_weight_ = median_weight_;
+                break;
+
+            default:
+                user_word_default_weight_ = max_weight_;
+                break;
+        }
+    }
+
+    double CalcFreqSum(const vector<DatElement>& node_infos) const {
+        double sum = 0.0;
+
+        for (size_t i = 0; i < node_infos.size(); i++) {
+            sum += node_infos[i].weight;
+        }
+
+        return sum;
+    }
+
+    void CalculateWeight(vector<DatElement>& node_infos, double sum) const {
+        for (size_t i = 0; i < node_infos.size(); i++) {
+            DatElement& node_info = node_infos[i];
+            assert(node_info.weight > 0.0);
+            node_info.weight = log(double(node_info.weight) / sum);
+        }
+    }
+
+private:
+    vector<DatElement> static_node_infos_;
+    size_t total_dict_size_ = 0;
+    DatTrie dat_;
+
+    double freq_sum_;
+    double user_word_default_weight_;
+    unordered_set<Rune> user_dict_single_chinese_word_;
+};
+}
+
--- a/libchinese-segmentation/cppjieba/FullSegment.hpp
+++ b/libchinese-segmentation/cppjieba/FullSegment.hpp
@ -0,0 +1,67 @@
+#pragma once
+
+#include <algorithm>
+#include <set>
+#include <cassert>
+#include "limonp/Logging.hpp"
+#include "segment-trie/segment-trie.h"
+//#include "DictTrie.hpp"
+#include "SegmentBase.hpp"
+#include "Unicode.hpp"
+
+namespace cppjieba {
+class FullSegment: public SegmentBase {
+public:
+    FullSegment(const DictTrie* dictTrie)
+        : dictTrie_(dictTrie) {
+        assert(dictTrie_);
+    }
+    ~FullSegment() { }
+
+    virtual void Cut(RuneStrArray::const_iterator begin,
+                     RuneStrArray::const_iterator end,
+                     vector<WordRange>& res, bool, size_t) const override {
+        assert(dictTrie_);
+        vector<struct DatDag> dags;
+        dictTrie_->FindDatDag(begin, end, dags);
+        size_t max_word_end_pos = 0;
+
+        for (size_t i = 0; i < dags.size(); i++) {
+            for (const auto & kv : dags[i].nexts) {
+                const size_t nextoffset = kv.first - 1;
+                assert(nextoffset < dags.size());
+                const auto wordLen = nextoffset - i + 1;
+                const bool is_not_covered_single_word = ((dags[i].nexts.size() == 1) && (max_word_end_pos <= i));
+                const bool is_oov = (nullptr == kv.second); //Out-of-Vocabulary
+
+                if ((is_not_covered_single_word) || ((not is_oov) && (wordLen >= 2))) {
+                    WordRange wr(begin + i, begin + nextoffset);
+                    res.push_back(wr);
+                }
+
+                max_word_end_pos = max(max_word_end_pos, nextoffset + 1);
+            }
+        }
+    }
+
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
+                     size_t) const override {
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
+    }
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t) const override {
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
+    }
+private:
+    const DictTrie* dictTrie_;
+};
+}
+
--- a/libchinese-segmentation/cppjieba/HMMModel.hpp
+++ b/libchinese-segmentation/cppjieba/HMMModel.hpp
@ -0,0 +1,158 @@
+#pragma once
+
+#include "limonp/StringUtil.hpp"
+//#define USE_CEDAR_SEGMENT //使用cedar初步测试性能损失3%-5%左右，内存占用降低近1M
+#ifdef USE_CEDAR_SEGMENT
+#include "cedar/cedar.h"
+#endif
+namespace cppjieba {
+
+using namespace limonp;
+#ifdef USE_CEDAR_SEGMENT
+typedef cedar::da<float, -1, -2, false> EmitProbMap;
+#else
+typedef unordered_map<Rune, double> EmitProbMap;
+#endif
+struct HMMModel {
+    /*
+     * STATUS:
+     * 0: HMMModel::B, 1: HMMModel::E, 2: HMMModel::M, 3:HMMModel::S
+     * */
+    enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
+
+    HMMModel(const string& modelPath) {
+        memset(startProb, 0, sizeof(startProb));
+        memset(transProb, 0, sizeof(transProb));
+        statMap[0] = 'B';
+        statMap[1] = 'E';
+        statMap[2] = 'M';
+        statMap[3] = 'S';
+        emitProbVec.push_back(&emitProbB);
+        emitProbVec.push_back(&emitProbE);
+        emitProbVec.push_back(&emitProbM);
+        emitProbVec.push_back(&emitProbS);
+        LoadModel(modelPath);
+    }
+    ~HMMModel() {
+    }
+    void LoadModel(const string& filePath) {
+        ifstream ifile(filePath.c_str());
+        XCHECK(ifile.is_open()) << "open " << filePath << " failed";
+        string line;
+        vector<string> tmp;
+        vector<string> tmp2;
+        //Load startProb
+        XCHECK(GetLine(ifile, line));
+        Split(line, tmp, " ");
+        XCHECK(tmp.size() == STATUS_SUM);
+
+        for (size_t j = 0; j < tmp.size(); j++) {
+            startProb[j] = atof(tmp[j].c_str());
+        }
+
+        //Load transProb
+        for (size_t i = 0; i < STATUS_SUM; i++) {
+            XCHECK(GetLine(ifile, line));
+            Split(line, tmp, " ");
+            XCHECK(tmp.size() == STATUS_SUM);
+
+            for (size_t j = 0; j < tmp.size(); j++) {
+                transProb[i][j] = atof(tmp[j].c_str());
+            }
+        }
+
+        //Load emitProbB
+        XCHECK(GetLine(ifile, line));
+        XCHECK(LoadEmitProb(line, emitProbB));
+
+        //Load emitProbE
+        XCHECK(GetLine(ifile, line));
+        XCHECK(LoadEmitProb(line, emitProbE));
+
+        //Load emitProbM
+        XCHECK(GetLine(ifile, line));
+        XCHECK(LoadEmitProb(line, emitProbM));
+
+        //Load emitProbS
+        XCHECK(GetLine(ifile, line));
+        XCHECK(LoadEmitProb(line, emitProbS));
+    }
+    double GetEmitProb(const EmitProbMap* ptMp, Rune key,
+                       double defVal)const {
+#ifdef USE_CEDAR_SEGMENT
+        char str_key[8];
+        snprintf(str_key, sizeof(str_key), "%d", key);
+        float result = ptMp->exactMatchSearch<float>(str_key);
+        return result < 0 ? defVal : result;
+#else
+        EmitProbMap::const_iterator cit = ptMp->find(key);
+
+        if (cit == ptMp->end()) {
+            return defVal;
+        }
+
+        return cit->second;
+#endif
+    }
+    bool GetLine(ifstream& ifile, string& line) {
+        while (getline(ifile, line)) {
+            Trim(line);
+
+            if (line.empty()) {
+                continue;
+            }
+
+            if (StartsWith(line, "#")) {
+                continue;
+            }
+
+            return true;
+        }
+
+        return false;
+    }
+    bool LoadEmitProb(const string& line, EmitProbMap& mp) {
+        if (line.empty()) {
+            return false;
+        }
+
+        vector<string> tmp, tmp2;
+        RuneArray unicode;
+        Split(line, tmp, ",");
+
+        for (size_t i = 0; i < tmp.size(); i++) {
+            Split(tmp[i], tmp2, ":");
+
+            if (2 != tmp2.size()) {
+                XLOG(ERROR) << "emitProb illegal.";
+                return false;
+            }
+
+            if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
+                XLOG(ERROR) << "TransCode failed.";
+                return false;
+            }
+#ifdef USE_CEDAR_SEGMENT
+            char str_key[8];
+            snprintf(str_key, sizeof(str_key), "%d", unicode[0]);
+            mp.update(str_key, std::strlen(str_key), atof(tmp2[1].c_str()));
+#else
+            mp[unicode[0]] = atof(tmp2[1].c_str());
+#endif
+        }
+
+        return true;
+    }
+
+    char statMap[STATUS_SUM];
+    double startProb[STATUS_SUM];
+    double transProb[STATUS_SUM][STATUS_SUM];
+    EmitProbMap emitProbB;
+    EmitProbMap emitProbE;
+    EmitProbMap emitProbM;
+    EmitProbMap emitProbS;
+    vector<EmitProbMap* > emitProbVec;
+}; // struct HMMModel
+
+} // namespace cppjieba
+
--- a/libchinese-segmentation/cppjieba/HMMSegment.hpp
+++ b/libchinese-segmentation/cppjieba/HMMSegment.hpp
@ -0,0 +1,206 @@
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <memory.h>
+#include <cassert>
+#include "HMMModel.hpp"
+#include "SegmentBase.hpp"
+
+namespace cppjieba {
+
+const double MIN_DOUBLE = -3.14e+100;
+
+class HMMSegment: public SegmentBase {
+public:
+    HMMSegment(const HMMModel* model)
+        : model_(model) {
+    }
+    ~HMMSegment() { }
+
+    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool,
+                     size_t) const override {
+        RuneStrArray::const_iterator left = begin;
+        RuneStrArray::const_iterator right = begin;
+
+        while (right != end) {
+            if (right->rune < 0x80) { //asc码
+                if (left != right) {
+                    InternalCut(left, right, res);
+                }
+
+                left = right;
+
+                do {
+                    right = SequentialLetterRule(left, end);//非英文字符则返回left，否则返回left后非英文字母的位置
+
+                    if (right != left) {
+                        break;
+                    }
+
+                    right = NumbersRule(left, end);//非数字则返回left，否则返回left后非数字的位置
+
+                    if (right != left) {
+                        break;
+                    }
+
+                    right ++;
+                } while (false);
+
+                WordRange wr(left, right - 1);
+                res.push_back(wr);
+                left = right;
+            } else {
+                right++;
+            }
+        }
+
+        if (left != right) {
+            InternalCut(left, right, res);
+        }
+    }
+
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
+                     size_t) const override {
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
+    }
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t) const override {
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
+    }
+private:
+    // sequential letters rule
+    RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin,
+                                                      RuneStrArray::const_iterator end) const {
+        Rune x = begin->rune;
+
+        if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
+            begin ++;
+        } else {
+            return begin;
+        }
+
+        while (begin != end) {
+            x = begin->rune;
+
+            if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
+                begin ++;
+            } else {
+                break;
+            }
+        }
+
+        return begin;
+    }
+    //
+    RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
+        Rune x = begin->rune;
+
+        if ('0' <= x && x <= '9') {
+            begin ++;
+        } else {
+            return begin;
+        }
+
+        while (begin != end) {
+            x = begin->rune;
+
+            if (('0' <= x && x <= '9') || x == '.') {
+                begin++;
+            } else {
+                break;
+            }
+        }
+
+        return begin;
+    }
+    void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
+        vector<size_t> status;
+        Viterbi(begin, end, status);
+
+        RuneStrArray::const_iterator left = begin;
+        RuneStrArray::const_iterator right;
+
+        for (size_t i = 0; i < status.size(); i++) {
+            if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
+                right = begin + i + 1;
+                WordRange wr(left, right - 1);
+                res.push_back(wr);
+                left = right;
+            }
+        }
+    }
+
+    void Viterbi(RuneStrArray::const_iterator begin,
+                 RuneStrArray::const_iterator end,
+                 vector<size_t>& status) const {
+        size_t Y = HMMModel::STATUS_SUM;
+        size_t X = end - begin;
+
+        size_t XYSize = X * Y;
+        size_t now, old, stat;
+        double tmp, endE, endS;
+
+        //vector<int> path(XYSize);
+        //vector<double> weight(XYSize);
+        int path[XYSize];
+        double weight[XYSize];
+
+        //start
+        for (size_t y = 0; y < Y; y++) {
+            weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
+            path[0 + y * X] = -1;
+        }
+
+        double emitProb;
+
+        for (size_t x = 1; x < X; x++) {
+            for (size_t y = 0; y < Y; y++) {
+                now = x + y * X;
+                weight[now] = MIN_DOUBLE;
+                path[now] = HMMModel::E; // warning
+                emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE);
+
+                for (size_t preY = 0; preY < Y; preY++) {
+                    old = x - 1 + preY * X;
+                    tmp = weight[old] + model_->transProb[preY][y] + emitProb;
+
+                    if (tmp > weight[now]) {
+                        weight[now] = tmp;
+                        path[now] = preY;
+                    }
+                }
+            }
+        }
+
+        endE = weight[X - 1 + HMMModel::E * X];
+        endS = weight[X - 1 + HMMModel::S * X];
+        stat = 0;
+
+        if (endE >= endS) {
+            stat = HMMModel::E;
+        } else {
+            stat = HMMModel::S;
+        }
+
+        status.resize(X);
+
+        for (int x = X - 1 ; x >= 0; x--) {
+            status[x] = stat;
+            stat = path[x + stat * X];
+        }
+    }
+
+    const HMMModel* model_;
+}; // class HMMSegment
+
+} // namespace cppjieba
+
--- a/libchinese-segmentation/cppjieba/IdfTrie.hpp
+++ b/libchinese-segmentation/cppjieba/IdfTrie.hpp
@ -0,0 +1,117 @@
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <string>
+#include <cstring>
+#include <cstdlib>
+#include <stdint.h>
+#include <cmath>
+#include <limits>
+#include "limonp/StringUtil.hpp"
+#include "limonp/Logging.hpp"
+#include "Unicode.hpp"
+#include "DatTrie.hpp"
+#include <QDebug>
+namespace cppjieba {
+
+using namespace limonp;
+
+const size_t IDF_COLUMN_NUM = 2;
+
+class IdfTrie {
+public:
+    enum UserWordWeightOption {
+        WordWeightMin,
+        WordWeightMedian,
+        WordWeightMax,
+    }; // enum UserWordWeightOption
+
+    IdfTrie(const string& dict_path, const string & dat_cache_path = "",
+             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
+        Init(dict_path, dat_cache_path, user_word_weight_opt);
+    }
+
+    ~IdfTrie() {}
+
+    double Find(const string & word, std::size_t length = 0, std::size_t node_pos = 0) const {
+        return dat_.Find(word, length, node_pos);
+    }
+
+    size_t GetTotalDictSize() const {
+        return total_dict_size_;
+    }
+
+private:
+    void Init(const string& dict_path, string dat_cache_path,
+              UserWordWeightOption user_word_weight_opt) {
+        size_t file_size_sum = 0;
+        const string md5 = CalcFileListMD5(dict_path, file_size_sum);
+        total_dict_size_ = file_size_sum;
+
+        if (dat_cache_path.empty()) {
+            dat_cache_path = "/tmp/" + md5 + ".dat_";//未指定词库数据文件存储位置的默认存储在tmp目录下
+        }
+         dat_cache_path += VERSION;
+        QString path = QString::fromStdString(dat_cache_path);
+        qDebug() << "#########Idf path:" << path;
+        if (dat_.InitIdfAttachDat(dat_cache_path, md5)) {
+            return;
+        }
+
+        LoadDefaultIdf(dict_path);
+        double idf_sum_ = CalcIdfSum(static_node_infos_);
+        assert(static_node_infos_.size());
+        idfAverage_ = idf_sum_ / static_node_infos_.size();
+        assert(idfAverage_ > 0.0);
+        double min_weight = 0;
+        dat_.SetMinWeight(min_weight);
+
+        const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
+        assert(build_ret);
+        vector<IdfElement>().swap(static_node_infos_);
+    }
+
+    void LoadDefaultIdf(const string& filePath) {
+        ifstream ifs(filePath.c_str());
+        if(not ifs.is_open()){
+            return ;
+        }
+        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
+        string line;
+        vector<string> buf;
+        size_t lineno = 0;
+
+        for (; getline(ifs, line); lineno++) {
+            if (line.empty()) {
+                XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
+                continue;
+            }
+            Split(line, buf, " ");
+            XCHECK(buf.size() == IDF_COLUMN_NUM) << "split result illegal, line:" << line;
+            IdfElement node_info;
+            node_info.word = buf[0];
+            node_info.idf = atof(buf[1].c_str());
+            static_node_infos_.push_back(node_info);
+        }
+    }
+
+    double CalcIdfSum(const vector<IdfElement>& node_infos) const {
+        double sum = 0.0;
+
+        for (size_t i = 0; i < node_infos.size(); i++) {
+            sum += node_infos[i].idf;
+        }
+
+        return sum;
+    }
+public:
+    double idfAverage_;
+private:
+    vector<IdfElement> static_node_infos_;
+    size_t total_dict_size_ = 0;
+    DatTrie dat_;
+};
+}
+
--- a/libchinese-segmentation/cppjieba/Jieba.hpp
+++ b/libchinese-segmentation/cppjieba/Jieba.hpp
@ -0,0 +1,99 @@
+#pragma once
+
+#include <memory>
+#include "QuerySegment.hpp"
+#include "KeywordExtractor.hpp"
+#include "segment-trie/segment-trie.h"
+
+namespace cppjieba {
+
+class Jieba {
+public:
+    Jieba(const string& dict_path,
+          const string& model_path,
+          const string& user_dict_path,
+          const string& idfPath = "",
+          const string& stopWordPath = "",
+          const string& dat_cache_path = "")
+        : dict_trie_(dict_path, user_dict_path, dat_cache_path),
+          model_(model_path),
+          mp_seg_(&dict_trie_),
+          hmm_seg_(&model_),
+          mix_seg_(&dict_trie_, &model_, stopWordPath),
+          full_seg_(&dict_trie_),
+          query_seg_(&dict_trie_, &model_, stopWordPath),
+          extractor(&dict_trie_, &model_, idfPath, dat_cache_path, stopWordPath){ }
+    ~Jieba() { }
+
+    void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
+        mix_seg_.CutToStr(sentence, words, hmm);
+    }
+    void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
+        mix_seg_.CutToWord(sentence, words, hmm);
+    }
+    void CutAll(const string& sentence, vector<string>& words) const {
+        full_seg_.CutToStr(sentence, words);
+    }
+    void CutAll(const string& sentence, vector<Word>& words) const {
+        full_seg_.CutToWord(sentence, words);
+    }
+    void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
+        query_seg_.CutToStr(sentence, words, hmm);
+    }
+    void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
+        query_seg_.CutToWord(sentence, words, hmm);
+    }
+    void CutHMM(const string& sentence, vector<string>& words) const {
+        hmm_seg_.CutToStr(sentence, words);
+    }
+    void CutHMM(const string& sentence, vector<Word>& words) const {
+        hmm_seg_.CutToWord(sentence, words);
+    }
+    void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
+        mp_seg_.CutToStr(sentence, words, false, max_word_len);
+    }
+    void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
+        mp_seg_.CutToWord(sentence, words, false, max_word_len);
+    }
+
+    void Tag(const string& sentence, vector<pair<string, string> >& words) const {
+        mix_seg_.Tag(sentence, words);
+    }
+    string LookupTag(const string &str) const {
+        return mix_seg_.LookupTag(str);
+    }
+
+    void ResetSeparators(const string& s) {
+        //TODO
+        mp_seg_.ResetSeparators(s);
+        hmm_seg_.ResetSeparators(s);
+        mix_seg_.ResetSeparators(s);
+        full_seg_.ResetSeparators(s);
+        query_seg_.ResetSeparators(s);
+    }
+
+    const DictTrie* GetDictTrie() const {
+        return &dict_trie_;
+    }
+
+    const HMMModel* GetHMMModel() const {
+        return &model_;
+    }
+
+private:
+    DictTrie dict_trie_;
+    HMMModel model_;
+
+    // They share the same dict trie and model
+    MPSegment mp_seg_;
+    HMMSegment hmm_seg_;
+    MixSegment mix_seg_;
+    FullSegment full_seg_;
+    QuerySegment query_seg_;
+
+public:
+    KeywordExtractor extractor;
+}; // class Jieba
+
+} // namespace cppjieba
+
--- a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
@ -0,0 +1,100 @@
+#pragma once
+
+#include <cmath>
+#include "MixSegment.hpp"
+//#include "IdfTrie.hpp"
+#include "idf-trie/idf-trie.h"
+
+namespace cppjieba {
+
+using namespace limonp;
+using namespace std;
+
+/*utf8*/
+class KeywordExtractor {
+public:
+
+    KeywordExtractor(const DictTrie* dictTrie,
+                     const HMMModel* model,
+                     const string& idfPath,
+                     const string& dat_cache_path,
+                     const string& stopWordPath)
+        : segment_(dictTrie, model, stopWordPath),
+          idf_trie_(idfPath, dat_cache_path){
+    }
+    ~KeywordExtractor() {
+    }
+
+    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
+        vector<KeyWord> topWords;
+        Extract(sentence, topWords, topN);
+
+        for (size_t i = 0; i < topWords.size(); i++) {
+            keywords.push_back(topWords[i].word);
+        }
+    }
+
+    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
+        vector<KeyWord> topWords;
+        Extract(sentence, topWords, topN);
+
+        for (size_t i = 0; i < topWords.size(); i++) {
+            keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
+        }
+    }
+
+    void Extract(const string& sentence, vector<KeyWord>& keywords, size_t topN) const {
+
+        unordered_map<string, KeyWord> wordmap;//插入字符串与Word的map，相同string统计词频叠加权重
+        PreFilter pre_filter(symbols_, sentence);
+        RuneStrArray::const_iterator null_p;
+        WordRange range(null_p, null_p);
+        bool isNull(false);
+        while (pre_filter.Next(range, isNull)) {
+            if (isNull) {
+                continue;
+            }
+            segment_.CutToStr(sentence, range,  wordmap);
+        }
+
+        keywords.clear();
+        keywords.reserve(wordmap.size());
+
+        for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
+            double idf = idf_trie_.Find(itr->first);
+            if (-1 != idf) {//IDF词典查找
+                itr->second.weight *= idf;
+            } else {
+                itr->second.weight *= idf_trie_.GetIdfAverage();
+            }
+
+            itr->second.word = itr->first;
+            keywords.push_back(itr->second);
+        }
+
+        topN = min(topN, keywords.size());
+        partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
+        keywords.resize(topN);
+    }
+private:
+
+    static bool Compare(const KeyWord& lhs, const KeyWord& rhs) {
+        return lhs.weight > rhs.weight;
+    }
+
+    MixSegment segment_;
+    IdfTrie idf_trie_;
+
+
+    unordered_set<Rune> symbols_;
+}; // class KeywordExtractor
+
+inline ostream& operator << (ostream& os, const KeyWord& word) {
+    return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
+           "}";
+}
+
+} // namespace cppjieba
+
+
+
--- a/libchinese-segmentation/cppjieba/MPSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MPSegment.hpp
@ -0,0 +1,133 @@
+#pragma once
+
+#include <algorithm>
+#include <set>
+#include <cassert>
+#include "limonp/Logging.hpp"
+#include "segment-trie/segment-trie.h"
+//#include "DictTrie.hpp"
+#include "SegmentTagged.hpp"
+#include "PosTagger.hpp"
+
+namespace cppjieba {
+
+class MPSegment: public SegmentTagged {
+public:
+    MPSegment(const DictTrie* dictTrie)
+        : dictTrie_(dictTrie) {
+        assert(dictTrie_);
+    }
+    ~MPSegment() { }
+
+    virtual void Cut(RuneStrArray::const_iterator begin,
+                     RuneStrArray::const_iterator end,
+                     vector<WordRange>& words,
+                     bool, size_t max_word_len) const override {
+        dictTrie_->FindWordRange(begin, end, words, max_word_len);
+    }
+
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
+                     size_t) const override {
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
+    }
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t) const override {
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
+    }
+    const DictTrie* GetDictTrie() const override {
+        return dictTrie_;
+    }
+
+    bool Tag(const string& src, vector<pair<string, string> >& res) const override {
+        return tagger_.Tag(src, res, *this);
+    }
+
+    bool IsUserDictSingleChineseWord(const Rune& value) const {
+        return dictTrie_->IsUserDictSingleChineseWord(value);
+    }
+private:
+/*
+    void CalcDP(vector<DatDag>& dags) const {
+        double val(0);
+        for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) {
+            rit->max_next = -1;
+            rit->max_weight = MIN_DOUBLE;
+
+            for (const auto & it : rit->nexts) {
+                const auto nextPos = it.first;
+                val = dictTrie_->GetMinWeight();
+
+                if (nullptr != it.second) {
+                    val = it.second->weight;
+                }
+
+                if (nextPos  < dags.size()) {
+                    val += dags[nextPos].max_weight;
+                }
+
+                if ((nextPos <= dags.size()) && (val > rit->max_weight)) {
+                    rit->max_weight = val;
+                    rit->max_next = nextPos;
+                }
+            }
+        }
+    }
+*/
+/*  倒叙方式重写CalcDP函数，初步测试未发现问题*/
+/*
+    void CalcDP(vector<DatDag>& dags) const {
+        double val(0);
+        size_t size = dags.size();
+
+        for (size_t i = 0; i < size; i++) {
+            dags[size - 1 - i].max_next = -1;
+            dags[size - 1 - i].max_weight = MIN_DOUBLE;
+
+            for (const auto & it : dags[size - 1 - i].nexts) {
+                const auto nextPos = it.first;
+                if (nullptr != it.second) {
+                    val = it.second->weight;
+                }
+
+                if (nextPos  < dags.size()) {
+                    val += dags[nextPos].max_weight;
+                }
+
+                if ((nextPos <= dags.size()) && (val > dags[size - 1 - i].max_weight)) {
+                    dags[size - 1 - i].max_weight = val;
+                    dags[size - 1 - i].max_next = nextPos;
+                }
+            }
+        }
+    }
+
+    void CutByDag(RuneStrArray::const_iterator begin,
+                  RuneStrArray::const_iterator,
+                  const vector<DatDag>& dags,
+                  vector<WordRange>& words) const {
+
+        for (size_t i = 0; i < dags.size();) {
+            const auto next = dags[i].max_next;
+            assert(next > i);
+            assert(next <= dags.size());
+            WordRange wr(begin + i, begin + next - 1);
+            words.push_back(wr);
+            i = next;
+        }
+    }
+*///相关功能已集成到Find函数中
+    const DictTrie* dictTrie_;
+    PosTagger tagger_;
+
+}; // class MPSegment
+
+} // namespace cppjieba
+
--- a/libchinese-segmentation/cppjieba/MixSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MixSegment.hpp
@ -0,0 +1,276 @@
+#pragma once
+
+#include <cassert>
+#include "MPSegment.hpp"
+#include "HMMSegment.hpp"
+#include "limonp/StringUtil.hpp"
+#include "PosTagger.hpp"
+#define STOP_WORDS_USE_CEDAR_SEGMENT //使用cedar初步测试性能提升3%-5%左右，内存占用降低近不明显
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+#include "cedar/cedar.h"
+#endif
+
+namespace cppjieba {
+class MixSegment: public SegmentTagged {
+public:
+    MixSegment(const DictTrie* dictTrie,
+               const HMMModel* model,
+               const string& stopWordPath)
+        : mpSeg_(dictTrie), hmmSeg_(model) {
+        LoadStopWordDict(stopWordPath);
+    }
+    ~MixSegment() {}
+
+    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
+                     size_t) const override {
+        if (!hmm) {
+            mpSeg_.CutRuneArray(begin, end, res);
+            return;
+        }
+
+        vector<WordRange> words;
+        assert(end >= begin);
+        words.reserve(end - begin);
+        mpSeg_.CutRuneArray(begin, end, words);
+
+        vector<WordRange> hmmRes;
+        hmmRes.reserve(end - begin);
+
+        for (size_t i = 0; i < words.size(); i++) {
+            //if mp Get a word, it's ok, put it into result
+            if (words[i].left != words[i].right || (words[i].left == words[i].right &&
+                                                    mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
+                res.push_back(words[i]);
+                continue;
+            }
+
+            // if mp Get a single one and it is not in userdict, collect it in sequence
+            size_t j = i;
+
+            while (j < words.size() && words[j].left == words[j].right &&
+                   !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
+                j++;
+            }
+
+            // Cut the sequence with hmm
+            assert(j - 1 >= i);
+            // TODO
+            hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
+
+            //put hmm result to result
+            for (size_t k = 0; k < hmmRes.size(); k++) {
+                res.push_back(hmmRes[k]);
+            }
+
+            //clear tmp vars
+            hmmRes.clear();
+
+            //let i jump over this piece
+            i = j - 1;
+        }
+    }
+
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
+                     size_t) const override {
+        //目前hmm默认开启，后期如有需要关闭再修改--jxx20210519
+//        if (!hmm) {
+//            mpSeg_.CutRuneArray(begin, end, res);
+//            return;
+//        }
+        std::ignore = hmm;
+        vector<WordRange> words;
+        assert(end >= begin);
+        words.reserve(end - begin);
+        mpSeg_.CutRuneArray(begin, end, words);
+
+        vector<WordRange> hmmRes;
+        hmmRes.reserve(end - begin);
+
+        for (size_t i = 0; i < words.size(); i++) {
+            //if mp Get a word, it's ok, put it into result
+            if (words[i].left != words[i].right) {
+                res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
+                continue;
+            }
+            if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
+                    || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
+                res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
+                continue;
+            }
+
+            // if mp Get a single one and it is not in userdict, collect it in sequence
+            size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里（i字符不是最后一个字符），直接判定j字符
+
+            while (j < (words.size() - 1) && words[j].left == words[j].right &&
+                   !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
+                j++;
+            }
+
+            // Cut the sequence with hmm
+            assert(j - 1 >= i);
+            // TODO
+            hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
+
+            //put hmm result to result
+            for (size_t k = 0; k < hmmRes.size(); k++) {
+                res.push_back(GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right));
+            }
+
+            //clear tmp vars
+            hmmRes.clear();
+
+            //let i jump over this piece
+            i = j - 1;
+        }
+    }
+
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t) const override {
+        std::ignore = hmm;
+        vector<WordRange> words;
+        vector<WordRange> hmmRes;
+        assert(end >= begin);
+        if (3 == begin->len or 4 == begin->len) {
+            words.reserve(end - begin);
+            mpSeg_.CutRuneArray(begin, end, words);
+            hmmRes.reserve(words.size());
+        } else {
+            hmmRes.reserve(end - begin);
+        }
+
+        if (words.size() != 0) {//存在中文分词结果
+            for (size_t i = 0; i < words.size(); i++) {
+
+                string str = GetStringFromRunes(s, words[i].left, words[i].right);
+
+                if (words[i].left != words[i].right) {
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+                    if (0 < stopWords_.exactMatchSearch<int>(str.c_str(), str.size())) {
+                        continue;
+                    }
+#else
+                    if (stopWords_.find(str) != stopWords_.end()) {
+                        continue;
+                    }
+#endif
+                    res[str].offsets.push_back(words[i].left->offset);
+                    res[str].weight += 1.0;
+                    continue;
+                }
+
+                if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
+                        || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+                    if (0 < stopWords_.exactMatchSearch<int>(str.c_str(), str.size())) {
+                        continue;
+                    }
+#else
+                    if (stopWords_.find(str) != stopWords_.end()) {
+                        continue;
+                    }
+#endif
+                    res[str].offsets.push_back(words[i].left->offset);
+                    res[str].weight += 1.0;
+                    continue;
+                }
+                // if mp Get a single one and it is not in userdict, collect it in sequence
+                size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里（i字符不是最后一个字符），直接判定j字符
+                bool isLastWordsSingle(false);
+                while (j <= (words.size() - 1)
+                       && words[j].left == words[j].right
+                       && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
+                    if (j == (words.size() - 1)) {//最后一个分词结果是单字
+                        isLastWordsSingle = true;
+                        break;
+                    }
+                    j++;
+                }
+
+                // Cut the sequence with hmm
+                assert(j - 1 >= i);
+                // TODO
+                if (isLastWordsSingle) {
+                    hmmSeg_.CutRuneArray(words[i].left, words[j].left + 1, hmmRes);
+                } else {
+                    hmmSeg_.CutRuneArray(words[i].left, words[j].left, hmmRes);
+                }
+
+                //put hmm result to result
+                for (size_t k = 0; k < hmmRes.size(); k++) {
+                    string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right);
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+                    if (0 < stopWords_.exactMatchSearch<int>(hmmStr.c_str(), hmmStr.size())) {
+                        continue;
+                    }
+#else
+                    if (/*IsSingleWord(hmmStr) || */stopWords_.find(hmmStr) != stopWords_.end()) {
+                        continue;
+                    }
+#endif
+
+                    res[hmmStr].offsets.push_back(hmmRes[k].left->offset);
+                    res[hmmStr].weight += 1.0;
+                }
+
+                //clear tmp vars
+                hmmRes.clear();
+
+                //let i jump over this piece
+                if (isLastWordsSingle) {
+                    break;
+                }
+                i = j - 1;
+            }
+        } else {//不存在中文分词结果
+            for (size_t i = 0; i < (size_t)(end - begin); i++) {
+                string str = s.substr((begin+i)->offset, (begin+i)->len);
+                res[str].offsets.push_back((begin+i)->offset);
+                res[str].weight += 1.0;
+            }
+        }
+    }
+
+    const DictTrie* GetDictTrie() const override {
+        return mpSeg_.GetDictTrie();
+    }
+
+    bool Tag(const string& src, vector<pair<string, string> >& res) const override {
+        return tagger_.Tag(src, res, *this);
+    }
+
+    string LookupTag(const string &str) const {
+        return tagger_.LookupTag(str, *this);
+    }
+
+    void LoadStopWordDict(const string& filePath) {
+        ifstream ifs(filePath.c_str());
+        if(not ifs.is_open()){
+            return ;
+        }
+        XCHECK(ifs.is_open()) << "open " << filePath << " failed";
+        string line ;
+
+        while (getline(ifs, line)) {
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+            stopWords_.update(line.c_str(), line.size(), 1);
+#else
+            stopWords_.insert(line);
+#endif
+        }
+
+        assert(stopWords_.size());
+    }
+private:
+#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
+    cedar::da<int, -1, -2, false> stopWords_;
+#else
+    unordered_set<string> stopWords_;
+#endif
+    MPSegment mpSeg_;
+    HMMSegment hmmSeg_;
+    PosTagger tagger_;
+
+}; // class MixSegment
+
+} // namespace cppjieba
+
--- a/libchinese-segmentation/cppjieba/PinYinTrie.hpp
+++ b/libchinese-segmentation/cppjieba/PinYinTrie.hpp
@ -0,0 +1,154 @@
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <string>
+#include <cstring>
+#include <cstdlib>
+#include <stdint.h>
+#include <cmath>
+#include <limits>
+#include "limonp/StringUtil.hpp"
+#include "limonp/Logging.hpp"
+#include "Unicode.hpp"
+#include "DatTrie.hpp"
+#include <QDebug>
+namespace cppjieba {
+
+using namespace limonp;
+
+const size_t PINYIN_COLUMN_NUM = 2;
+
+class PinYinTrie {
+public:
+    enum UserWordWeightOption {
+        WordWeightMin,
+        WordWeightMedian,
+        WordWeightMax,
+    }; // enum UserWordWeightOption
+
+    PinYinTrie(const string& dict_path, const string & dat_cache_path = "",
+             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
+        Init(dict_path, dat_cache_path, user_word_weight_opt);
+    }
+
+    ~PinYinTrie() {}
+
+    int getMultiTonResults(string word, QStringList &results) {
+        if (qmap_chinese2pinyin.contains(QString::fromStdString(word))) {
+            for (auto i:qmap_chinese2pinyin[QString::fromStdString(word)])
+                results.push_back(i);
+            return 0;
+        }
+        return -1;
+    }
+
+    int getSingleTonResult(string word, QString &result) {
+        const PinYinMemElem * tmp = dat_.PinYinFind(word);
+        if (tmp) {
+            result = QString::fromStdString(tmp->GetTag());
+            return 0;
+        }
+        return -1;
+    }
+
+    bool contains(string &word) {
+        if (qmap_chinese2pinyin.contains(QString::fromStdString(word))
+                or !dat_.PinYinFind(word))
+            return true;
+//        if (map_chinese2pinyin.contains(word)
+//                or !dat_.PinYinFind(word))
+//            return true;
+        return false;
+    }
+
+    bool isMultiTone(const string &word) {
+        if (qmap_chinese2pinyin.contains(QString::fromStdString(word)))
+            return true;
+//        if (map_chinese2pinyin.contains(word))
+//            return true;
+        return false;
+    }
+
+    size_t GetTotalDictSize() const {
+        return total_dict_size_;
+    }
+
+private:
+    void Init(const string& dict_path, string dat_cache_path,
+              UserWordWeightOption user_word_weight_opt) {
+        size_t file_size_sum = 0;
+        vector<PinYinElement> node_infos;
+        const string md5 = CalcFileListMD5(dict_path, file_size_sum);
+        total_dict_size_ = file_size_sum;
+
+        if (dat_cache_path.empty()) {
+            //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
+            dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) +  ".dat_cache";
+        }
+        QString path = QString::fromStdString(dat_cache_path);
+        qDebug() << "#########PinYin path:" << path << file_size_sum;
+        if (dat_.InitPinYinAttachDat(dat_cache_path, md5)) {
+            //多音字仍需遍历文件信息
+            LoadDefaultPinYin(node_infos, dict_path, true);
+            return;
+        }
+
+        LoadDefaultPinYin(node_infos, dict_path, false);
+        double min_weight = 0;
+        dat_.SetMinWeight(min_weight);
+
+        const auto build_ret = dat_.InitBuildDat(node_infos, dat_cache_path, md5);
+        assert(build_ret);
+        vector<PinYinElement>().swap(node_infos);
+    }
+
+    void LoadDefaultPinYin(vector<PinYinElement> &node_infos, const string& filePath, bool multiFlag) {
+        ifstream ifs(filePath.c_str());
+        if(not ifs.is_open()){
+            return ;
+        }
+        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
+        string line;
+        vector<string> buf;
+        size_t lineno = 0;
+
+        for (; getline(ifs, line); lineno++) {
+            if (line.empty()) {
+                XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
+                continue;
+            }
+            Split(line, buf, " ");
+            if (buf.size() == PINYIN_COLUMN_NUM) {
+                if (multiFlag) {//非多音字
+                    continue;
+                }
+                PinYinElement node_info;
+                node_info.word = buf[1];
+                node_info.tag = buf[0];
+                node_infos.push_back(node_info);
+            } else {//多音字
+                QString content = QString::fromUtf8(line.c_str());
+                qmap_chinese2pinyin[content.split(" ").last().trimmed()] = content.split(" ");
+                qmap_chinese2pinyin[content.split(" ").last().trimmed()].pop_back();
+                /*
+                 //std map string list
+                 list<string> tmpList;
+                 for(int i = 0; i < buf.size() - 1; ++i){
+                    tmpList.push_back(buf[i]);
+                 }
+                 map[buf[buf.size() - 1]] = tmpList;
+                */
+            }
+        }
+    }
+
+private:
+    QMap<QString, QStringList> qmap_chinese2pinyin;
+    //map<string, list<string>> map_chinese2pinyin;
+    size_t total_dict_size_ = 0;
+    DatTrie dat_;
+};
+}
+
--- a/libchinese-segmentation/cppjieba/PosTagger.hpp
+++ b/libchinese-segmentation/cppjieba/PosTagger.hpp
@ -0,0 +1,84 @@
+#pragma once
+
+#include "limonp/StringUtil.hpp"
+#include "segment-trie/segment-trie.h"
+//#include "DictTrie.hpp"
+//#include "SegmentTagged.hpp"
+
+namespace cppjieba {
+using namespace limonp;
+
+static const char* const POS_M = "m";
+static const char* const POS_ENG = "eng";
+static const char* const POS_X = "x";
+
+class PosTagger {
+public:
+    PosTagger() {
+    }
+    ~PosTagger() {
+    }
+
+    bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
+        vector<string> CutRes;
+        segment.CutToStr(src, CutRes);
+
+        for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
+            res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
+        }
+
+        return !res.empty();
+    }
+
+    string LookupTag(const string &str, const SegmentTagged& segment) const {
+        const DictTrie * dict = segment.GetDictTrie();
+        assert(dict != nullptr);
+        const auto tmp = dict->Find(str);
+
+        if (tmp == nullptr || tmp->GetTag().empty()) {
+            RuneStrArray runes;
+
+            if (!DecodeRunesInString(str, runes)) {
+                XLOG(ERROR) << "Decode failed.";
+                return POS_X;
+            }
+
+            return SpecialRule(runes);
+        } else {
+            return tmp->GetTag();
+        }
+    }
+
+private:
+    const char* SpecialRule(const RuneStrArray& unicode) const {
+        size_t m = 0;
+        size_t eng = 0;
+
+        for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
+            if (unicode[i].rune < 0x80) {
+                eng ++;
+
+                if ('0' <= unicode[i].rune && unicode[i].rune <= '9') {
+                    m++;
+                }
+            }
+        }
+
+        // ascii char is not found
+        if (eng == 0) {
+            return POS_X;
+        }
+
+        // all the ascii is number char
+        if (m == eng) {
+            return POS_M;
+        }
+
+        // the ascii chars contain english letter
+        return POS_ENG;
+    }
+
+}; // class PosTagger
+
+} // namespace cppjieba
+
--- a/libchinese-segmentation/cppjieba/PreFilter.hpp
+++ b/libchinese-segmentation/cppjieba/PreFilter.hpp
@ -0,0 +1,127 @@
+#pragma once
+
+#include "limonp/Logging.hpp"
+#include <unordered_set>
+#include "Unicode.hpp"
+
+namespace cppjieba {
+
+class PreFilter {
+public:
+    PreFilter(const std::unordered_set<Rune>& symbols,
+              const string& sentence)
+        : symbols_(symbols) {
+        if (!DecodeRunesInString(sentence, sentence_)) {
+            XLOG(ERROR) << "decode failed. "<<sentence;
+        }
+
+        cursor_ = sentence_.begin();
+    }
+    ~PreFilter() {
+    }
+    bool HasNext() const {
+        return cursor_ != sentence_.end();
+    }
+    bool Next(WordRange& wordRange) {
+
+        if (cursor_ == sentence_.end()) {
+            return false;
+        }
+
+        wordRange.left = cursor_;
+
+        while (cursor_->rune == 0x20 && cursor_ != sentence_.end()) {
+            cursor_++;
+        }
+
+        if (cursor_ == sentence_.end()) {
+            wordRange.right = cursor_;
+            return true;
+        }
+
+        while (++cursor_ != sentence_.end()) {
+            if (cursor_->rune == 0x20) {
+                wordRange.right = cursor_;
+                return true;
+            }
+        }
+
+        wordRange.right = sentence_.end();
+        return true;
+    }
+
+    bool Next(WordRange& wordRange, bool& isNull) {
+        isNull = false;
+        if (cursor_ == sentence_.end()) {
+            return false;
+        }
+
+        wordRange.left = cursor_;
+        if (cursor_->rune == 0x20) {
+            while (cursor_ != sentence_.end()) {
+                if (cursor_->rune != 0x20) {
+                    if (wordRange.left == cursor_) {
+                        cursor_ ++;
+                    }
+                    wordRange.right = cursor_;
+                    isNull = true;
+                    return true;
+                }
+                cursor_ ++;
+            }
+            return false;
+        }
+
+        int max_num = 0;
+        uint32_t utf8_num = cursor_->len;
+
+        while (cursor_ != sentence_.end()) {
+            if (cursor_->rune == 0x20) {
+                if (wordRange.left == cursor_) {
+                    cursor_ ++;
+                }
+
+                wordRange.right = cursor_;
+                return true;
+            }
+
+            cursor_ ++;
+            max_num++;
+            if (max_num >= 1024 or cursor_->len != utf8_num) { //todo 防止一次性传入过多字节，暂定限制为1024个字
+                wordRange.right = cursor_;
+                return true;
+            }
+        }
+
+        wordRange.right = sentence_.end();
+        return true;
+    }
+
+    WordRange Next() {
+        WordRange range(cursor_, cursor_);
+
+        while (cursor_ != sentence_.end()) {
+            //if (IsIn(symbols_, cursor_->rune)) {
+            if (cursor_->rune == 0x20) {
+                if (range.left == cursor_) {
+                    cursor_ ++;
+                }
+
+                range.right = cursor_;
+                return range;
+            }
+
+            cursor_ ++;
+        }
+
+        range.right = sentence_.end();
+        return range;
+    }
+private:
+    RuneStrArray::const_iterator cursor_;
+    RuneStrArray sentence_;
+    const std::unordered_set<Rune>& symbols_;
+}; // class PreFilter
+
+} // namespace cppjieba
+
--- a/libchinese-segmentation/cppjieba/QuerySegment.hpp
+++ b/libchinese-segmentation/cppjieba/QuerySegment.hpp
@ -0,0 +1,89 @@
+#pragma once
+
+#include <algorithm>
+#include <set>
+#include <cassert>
+#include "limonp/Logging.hpp"
+#include "SegmentBase.hpp"
+#include "FullSegment.hpp"
+#include "MixSegment.hpp"
+#include "Unicode.hpp"
+
+namespace cppjieba {
+class QuerySegment: public SegmentBase {
+public:
+    QuerySegment(const DictTrie* dictTrie,
+                 const HMMModel* model,
+                 const string& stopWordPath)
+        : mixSeg_(dictTrie, model, stopWordPath), trie_(dictTrie) {
+    }
+    ~QuerySegment() {
+    }
+
+    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
+                     size_t) const override {
+        //use mix Cut first
+        vector<WordRange> mixRes;
+        mixSeg_.CutRuneArray(begin, end, mixRes, hmm);
+
+        vector<WordRange> fullRes;
+
+        for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
+            if (mixResItr->Length() > 2) {
+                for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
+                    string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 2);
+
+                    if (trie_->Find(text) != nullptr) {
+                        WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
+                        res.push_back(wr);
+                    }
+                }
+            }
+
+            if (mixResItr->Length() > 3) {
+                for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
+                    string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 3);
+
+                    if (trie_->Find(text) != nullptr) {
+                        WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
+                        res.push_back(wr);
+                    }
+                }
+            }
+
+            res.push_back(*mixResItr);
+        }
+    }
+
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
+                     size_t) const override {
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
+    }
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t) const override {
+        std::ignore = s;
+        std::ignore = begin;
+        std::ignore = end;
+        std::ignore = res;
+        std::ignore = hmm;
+    }
+private:
+    bool IsAllAscii(const RuneArray& s) const {
+        for (size_t i = 0; i < s.size(); i++) {
+            if (s[i] >= 0x80) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+    MixSegment mixSeg_;
+    const DictTrie* trie_;
+}; // QuerySegment
+
+} // namespace cppjieba
+
--- a/libchinese-segmentation/cppjieba/SegmentBase.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentBase.hpp
@ -0,0 +1,94 @@
+#pragma once
+
+#include "limonp/Logging.hpp"
+#include "PreFilter.hpp"
+#include <cassert>
+
+
+namespace cppjieba {
+
+const char* const SPECIAL_SEPARATORS = " \t\n\xEF\xBC\x8C\xE3\x80\x82";
+
+using namespace limonp;
+
+class SegmentBase {
+public:
+    SegmentBase() {
+        XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
+    }
+    virtual ~SegmentBase() { }
+
+    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
+                     size_t max_word_len) const = 0;
+    //添加基于sentence的cut方法，减少中间变量的存储与格式转换--jxx20210517
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
+                     size_t max_word_len) const = 0;
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t max_word_len) const = 0;
+    //重写CutToStr函数，简化获取vector<string>& words的流程，降低内存占用--jxx20210517
+    void CutToStr(const string& sentence, vector<string>& words, bool hmm = true,
+                  size_t max_word_len = MAX_WORD_LENGTH) const {
+        PreFilter pre_filter(symbols_, sentence);
+        words.clear();
+        words.reserve(sentence.size() / 2);//todo 参考源码，参数待定
+        RuneStrArray::const_iterator null_p;
+        WordRange range(null_p, null_p);
+        while (pre_filter.Next(range)) {
+            CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
+        }
+    }
+    void CutToStr(const string& sentence, WordRange range, vector<string>& words, bool hmm = true,
+                  size_t max_word_len = MAX_WORD_LENGTH) const {
+        CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
+    }
+    void CutToStr(const string& sentence, WordRange range, unordered_map<string, KeyWord>& words, bool hmm = true,
+                  size_t max_word_len = MAX_WORD_LENGTH) const {
+        CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
+    }
+    void CutToWord(const string& sentence, vector<Word>& words, bool hmm = true,
+                   size_t max_word_len = MAX_WORD_LENGTH) const {
+        PreFilter pre_filter(symbols_, sentence);
+        vector<WordRange> wrs;
+        wrs.reserve(sentence.size() / 2);
+
+        while (pre_filter.HasNext()) {
+            auto range = pre_filter.Next();
+            Cut(range.left, range.right, wrs, hmm, max_word_len);
+        }
+
+        words.clear();
+        words.reserve(wrs.size());
+        GetWordsFromWordRanges(sentence, wrs, words);
+        wrs.clear();
+        vector<WordRange>().swap(wrs);
+    }
+
+    void CutRuneArray(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res,
+                      bool hmm = true, size_t max_word_len = MAX_WORD_LENGTH) const {
+        Cut(begin, end, res, hmm, max_word_len);
+    }
+
+    bool ResetSeparators(const string& s) {
+        symbols_.clear();
+        RuneStrArray runes;
+
+        if (!DecodeRunesInString(s, runes)) {
+            XLOG(ERROR) << "decode " << s << " failed";
+            return false;
+        }
+
+        for (size_t i = 0; i < runes.size(); i++) {
+            if (!symbols_.insert(runes[i].rune).second) {
+                XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
+                return false;
+            }
+        }
+
+        return true;
+    }
+protected:
+    unordered_set<Rune> symbols_;
+}; // class SegmentBase
+
+} // cppjieba
+
--- a/libchinese-segmentation/cppjieba/SegmentTagged.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentTagged.hpp
@ -0,0 +1,21 @@
+#pragma once
+
+#include "SegmentBase.hpp"
+
+namespace cppjieba {
+
+class SegmentTagged : public SegmentBase {
+public:
+    SegmentTagged() {
+    }
+    virtual ~SegmentTagged() {
+    }
+
+    virtual bool Tag(const string& src, vector<pair<string, string> >& res) const = 0;
+
+    virtual const DictTrie* GetDictTrie() const = 0;
+
+}; // class SegmentTagged
+
+} // cppjieba
+
--- a/libchinese-segmentation/cppjieba/TextRankExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/TextRankExtractor.hpp
@ -0,0 +1,205 @@
+
+#include <cmath>
+#include "Jieba.hpp"
+
+namespace cppjieba {
+using namespace limonp;
+using namespace std;
+
+class TextRankExtractor {
+public:
+    typedef struct _Word {
+        string word;
+        vector<size_t> offsets;
+        double weight;
+    }    Word; // struct Word
+private:
+    typedef std::map<string, Word> WordMap;
+
+    class WordGraph {
+    private:
+        typedef double Score;
+        typedef string Node;
+        typedef std::set<Node> NodeSet;
+
+        typedef std::map<Node, double> Edges;
+        typedef std::map<Node, Edges> Graph;
+        //typedef std::unordered_map<Node,double> Edges;
+        //typedef std::unordered_map<Node,Edges> Graph;
+
+        double d;
+        Graph graph;
+        NodeSet nodeSet;
+    public:
+        WordGraph(): d(0.85) {};
+        WordGraph(double in_d): d(in_d) {};
+
+        void addEdge(Node start, Node end, double weight) {
+            Edges temp;
+            Edges::iterator gotEdges;
+            nodeSet.insert(start);
+            nodeSet.insert(end);
+            graph[start][end] += weight;
+            graph[end][start] += weight;
+        }
+
+        void rank(WordMap &ws, size_t rankTime = 10) {
+            WordMap outSum;
+            Score wsdef, min_rank, max_rank;
+
+            if (graph.size() == 0) {
+                return;
+            }
+
+            wsdef = 1.0 / graph.size();
+
+            for (Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) {
+                // edges->first start节点；edge->first end节点；edge->second 权重
+                ws[edges->first].word = edges->first;
+                ws[edges->first].weight = wsdef;
+                outSum[edges->first].weight = 0;
+
+                for (Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) {
+                    outSum[edges->first].weight += edge->second;
+                }
+            }
+
+            //sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
+            for (size_t i = 0; i < rankTime; i++) {
+                for (NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) {
+                    double s = 0;
+
+                    for (Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
+                        // edge->first end节点；edge->second 权重
+                    {
+                        s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
+                    }
+
+                    ws[*node].weight = (1 - d) + d * s;
+                }
+            }
+
+            min_rank = max_rank = ws.begin()->second.weight;
+
+            for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
+                if (i->second.weight < min_rank) {
+                    min_rank = i->second.weight;
+                }
+
+                if (i->second.weight > max_rank) {
+                    max_rank = i->second.weight;
+                }
+            }
+
+            for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
+                ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
+            }
+        }
+    };
+
+public:
+    TextRankExtractor(const DictTrie* dictTrie,
+                      const HMMModel* model,
+                      const string& stopWordPath)
+        : segment_(dictTrie, model) {
+        LoadStopWordDict(stopWordPath);
+    }
+    TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
+        LoadStopWordDict(stopWordPath);
+    }
+    ~TextRankExtractor() {
+    }
+
+    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
+        vector<Word> topWords;
+        Extract(sentence, topWords, topN);
+
+        for (size_t i = 0; i < topWords.size(); i++) {
+            keywords.push_back(topWords[i].word);
+        }
+    }
+
+    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
+        vector<Word> topWords;
+        Extract(sentence, topWords, topN);
+
+        for (size_t i = 0; i < topWords.size(); i++) {
+            keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
+        }
+    }
+
+    void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
+        vector<string> words;
+        segment_.CutToStr(sentence, words);
+
+        TextRankExtractor::WordGraph graph;
+        WordMap wordmap;
+        size_t offset = 0;
+
+        for (size_t i = 0; i < words.size(); i++) {
+            size_t t = offset;
+            offset += words[i].size();
+
+            if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
+                continue;
+            }
+
+            for (size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) {
+                if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
+                    skip++;
+                    continue;
+                }
+
+                graph.addEdge(words[i], words[j], 1);
+            }
+
+            wordmap[words[i]].offsets.push_back(t);
+        }
+
+        if (offset != sentence.size()) {
+            XLOG(ERROR) << "words illegal";
+            return;
+        }
+
+        graph.rank(wordmap, rankTime);
+
+        keywords.clear();
+        keywords.reserve(wordmap.size());
+
+        for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
+            keywords.push_back(itr->second);
+        }
+
+        topN = min(topN, keywords.size());
+        partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
+        keywords.resize(topN);
+    }
+private:
+    void LoadStopWordDict(const string& filePath) {
+        ifstream ifs(filePath.c_str());
+        XCHECK(ifs.is_open()) << "open " << filePath << " failed";
+        string line ;
+
+        while (getline(ifs, line)) {
+            stopWords_.insert(line);
+        }
+
+        assert(stopWords_.size());
+    }
+
+    static bool Compare(const Word &x, const Word &y) {
+        return x.weight > y.weight;
+    }
+
+    MixSegment segment_;
+    unordered_set<string> stopWords_;
+}; // class TextRankExtractor
+
+inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
+    return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
+           "}";
+}
+} // namespace cppjieba
+
+
+
--- a/libchinese-segmentation/cppjieba/Unicode.hpp
+++ b/libchinese-segmentation/cppjieba/Unicode.hpp
@ -0,0 +1,264 @@
+#pragma once
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <vector>
+#include <ostream>
+#include "limonp/LocalVector.hpp"
+#include "limonp/StringUtil.hpp"
+#include "common-struct.h"
+
+namespace cppjieba {
+
+using std::string;
+using std::vector;
+
+typedef uint32_t Rune;
+
+inline std::ostream& operator << (std::ostream& os, const Word& w) {
+    return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
+}
+
+struct DatMemElem {
+    double weight = 0.0;
+    char tag[8] = {};
+
+    void SetTag(const string & str) {
+        memset(&tag[0], 0, sizeof(tag));
+        strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
+    }
+
+    string GetTag() const {
+        return &tag[0];
+    }
+};
+
+struct DatDag {
+    limonp::LocalVector<pair<size_t, const DatMemElem *> > nexts;
+    //double max_weight;
+    //size_t max_next;
+};
+
+struct RuneInfo {
+    Rune rune;
+    uint32_t offset;
+    uint32_t len;
+    uint32_t unicode_offset = 0;
+    uint32_t unicode_length = 0;
+    RuneInfo(): rune(0), offset(0), len(0) {
+    }
+    RuneInfo(Rune r, uint32_t o, uint32_t l)
+        : rune(r), offset(o), len(l) {
+    }
+    RuneInfo(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
+        : rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
+    }
+}; // struct RuneInfo
+
+inline std::ostream& operator << (std::ostream& os, const RuneInfo& r) {
+    return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
+}
+
+typedef limonp::LocalVector<Rune> RuneArray;
+typedef limonp::LocalVector<struct RuneInfo> RuneStrArray;
+
+// [left, right]
+struct WordRange {
+    RuneStrArray::const_iterator left;
+    RuneStrArray::const_iterator right;
+    WordRange(RuneStrArray::const_iterator l, RuneStrArray::const_iterator r)
+        : left(l), right(r) {
+    }
+    size_t Length() const {
+        return right - left;
+    }
+
+    bool IsAllAscii() const {
+        for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
+            if (iter->rune >= 0x80) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+}; // struct WordRange
+
+
+inline bool DecodeRunesInString(const string& s, RuneArray& arr) {
+    arr.clear();
+    return limonp::Utf8ToUnicode32(s, arr);
+}
+
+inline RuneArray DecodeRunesInString(const string& s) {
+    RuneArray result;
+    DecodeRunesInString(s, result);
+    return result;
+}
+
+inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
+
+    uint32_t tmp;
+    uint32_t offset = 0;
+    runes.clear();
+    uint32_t len(0);
+    for (size_t i = 0; i < s.size();) {
+      if (!(s.data()[i] & 0x80)) { // 0xxxxxxx
+        // 7bit, total 7bit
+        tmp = (uint8_t)(s.data()[i]) & 0x7f;
+        i++;
+        len = 1;
+      } else if ((uint8_t)s.data()[i] <= 0xdf && i + 1 < s.size()) { // 110xxxxxx
+        // 5bit, total 5bit
+        tmp = (uint8_t)(s.data()[i]) & 0x1f;
+
+        // 6bit, total 11bit
+        tmp <<= 6;
+        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
+        i += 2;
+        len = 2;
+      } else if((uint8_t)s.data()[i] <= 0xef && i + 2 < s.size()) { // 1110xxxxxx
+        // 4bit, total 4bit
+        tmp = (uint8_t)(s.data()[i]) & 0x0f;
+
+        // 6bit, total 10bit
+        tmp <<= 6;
+        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
+
+        // 6bit, total 16bit
+        tmp <<= 6;
+        tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
+
+        i += 3;
+        len = 3;
+      } else if((uint8_t)s.data()[i] <= 0xf7 && i + 3 < s.size()) { // 11110xxxx
+        // 3bit, total 3bit
+        tmp = (uint8_t)(s.data()[i]) & 0x07;
+
+        // 6bit, total 9bit
+        tmp <<= 6;
+        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
+
+        // 6bit, total 15bit
+        tmp <<= 6;
+        tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
+
+        // 6bit, total 21bit
+        tmp <<= 6;
+        tmp |= (uint8_t)(s.data()[i+3]) & 0x3f;
+
+        i += 4;
+        len = 4;
+      } else {
+        return false;
+      }
+      RuneInfo x(tmp, offset, len, i, 1);
+      runes.push_back(x);
+      offset += len;
+    }
+    return true;
+}
+
+class RunePtrWrapper {
+public:
+    const RuneInfo * m_ptr = nullptr;
+
+public:
+    explicit RunePtrWrapper(const RuneInfo * p) : m_ptr(p) {}
+
+    uint32_t operator *() {
+        return m_ptr->rune;
+    }
+
+    RunePtrWrapper operator ++(int) {
+        m_ptr ++;
+        return RunePtrWrapper(m_ptr);
+    }
+
+    bool operator !=(const RunePtrWrapper & b) const {
+        return this->m_ptr != b.m_ptr;
+    }
+};
+
+inline string EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) {
+    string str;
+    RunePtrWrapper it_begin(begin), it_end(end);
+    limonp::Unicode32ToUtf8(it_begin, it_end, str);
+    return str;
+}
+
+inline void EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, string& str) {
+    RunePtrWrapper it_begin(begin), it_end(end);
+    limonp::Unicode32ToUtf8(it_begin, it_end, str);
+    return;
+}
+
+class Unicode32Counter {
+public :
+    size_t length = 0;
+    void clear() {
+        length = 0;
+    }
+    void push_back(uint32_t) {
+        ++length;
+    }
+};
+
+inline size_t Utf8CharNum(const char * str, size_t length) {
+    Unicode32Counter c;
+
+    if (limonp::Utf8ToUnicode32(str, length, c)) {
+        return c.length;
+    }
+
+    return 0;
+}
+
+inline size_t Utf8CharNum(const string & str) {
+    return Utf8CharNum(str.data(), str.size());
+}
+
+inline bool IsSingleWord(const string& str) {
+    return Utf8CharNum(str) == 1;
+}
+
+
+// [left, right]
+inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
+    assert(right->offset >= left->offset);
+    uint32_t len = right->offset - left->offset + right->len;
+    uint32_t unicode_length = right->unicode_offset - left->unicode_offset + right->unicode_length;
+    return Word(s.substr(left->offset, len), left->offset, left->unicode_offset, unicode_length);
+}
+
+inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
+    assert(right->offset >= left->offset);
+    //uint32_t len = right->offset - left->offset + right->len;
+    return s.substr(left->offset, right->offset - left->offset + right->len);
+}
+
+inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
+    for (size_t i = 0; i < wrs.size(); i++) {
+        words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
+    }
+}
+
+inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
+    for (size_t i = 0; i < wrs.size(); i++) {
+        words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
+    }
+}
+
+inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
+    strs.resize(words.size());
+
+    for (size_t i = 0; i < words.size(); ++i) {
+        strs[i] = words[i].word;
+    }
+}
+
+const size_t MAX_WORD_LENGTH = 512;
+
+} // namespace cppjieba
+
--- a/libchinese-segmentation/cppjieba/cppjieba.pri
+++ b/libchinese-segmentation/cppjieba/cppjieba.pri
@ -0,0 +1,43 @@
+INCLUDEPATH += $$PWD
+
+HEADERS += \
+    $$PWD/DictTrie.hpp \
+    $$PWD/IdfTrie.hpp \
+    $$PWD/PinYinTrie.hpp \
+    $$PWD/FullSegment.hpp \
+    $$PWD/HMMModel.hpp \
+    $$PWD/HMMSegment.hpp \
+    $$PWD/Jieba.hpp \
+    $$PWD/KeywordExtractor.hpp \
+    $$PWD/MPSegment.hpp \
+    $$PWD/MixSegment.hpp \
+    $$PWD/PosTagger.hpp \
+    $$PWD/PreFilter.hpp \
+    $$PWD/QuerySegment.hpp \
+    $$PWD/SegmentBase.hpp \
+    $$PWD/SegmentTagged.hpp \
+    $$PWD/TextRankExtractor.hpp \
+#    $$PWD/Trie.hpp \
+    $$PWD/Unicode.hpp \
+    $$PWD/DatTrie.hpp \
+    $$PWD/idf-trie/idf-trie.h \
+    $$PWD/segment-trie/segment-trie.h
+
+DISTFILES += \
+    dict/README.md \
+    dict/hmm_model.utf8 \
+    dict/idf.utf8 \
+    dict/jieba.dict.utf8 \
+    dict/pos_dict/char_state_tab.utf8 \
+    dict/pos_dict/prob_emit.utf8 \
+    dict/pos_dict/prob_start.utf8 \
+    dict/pos_dict/prob_trans.utf8 \
+    dict/stop_words.utf8 \
+    dict/user.dict.utf8
+    #dict/pinyinWithoutTone.txt \
+
+include(limonp/limonp.pri)
+
+SOURCES += \
+    $$PWD/idf-trie/idf-trie.cpp \
+    $$PWD/segment-trie/segment-trie.cpp
--- a/libchinese-segmentation/cppjieba/idf-trie/idf-trie.cpp
+++ b/libchinese-segmentation/cppjieba/idf-trie/idf-trie.cpp
@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#include "idf-trie.h"
+
+IdfTrie::IdfTrie(const vector<string> file_paths, string dat_cache_path)
+    : StorageBase<double, false, IdfCacheFileHeader>(file_paths, dat_cache_path)
+{
+    this->Init();
+}
+
+IdfTrie::IdfTrie(string file_path, string dat_cache_path)
+: StorageBase<double, false, IdfCacheFileHeader>(vector<string>{file_path}, dat_cache_path)
+{
+    this->Init();
+}
+
+void IdfTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
+{
+    IdfCacheFileHeader header;
+    assert(sizeof(header.md5_hex) == md5.size());
+    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
+    double idf_sum(0), idf_average(0), tmp(0);
+    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+    umask(S_IWGRP | S_IWOTH);
+    const int fd =mkstemp((char *)tmp_filepath.data());
+    assert(fd >= 0);
+    fchmod(fd, 0644);
+
+    write_bytes = write(fd, (const char *)&header, sizeof(IdfCacheFileHeader));
+
+    ifstream ifs(IDF_DICT_PATH);
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, " ");
+        if (buf.size() != 2)
+            continue;
+        this->Update(buf[0].c_str(), buf[0].size(), elements_num);
+        offset += sizeof(double);
+        elements_num++;
+        tmp = atof(buf[1].c_str());
+        write_bytes += write(fd, &tmp, sizeof(double));
+        idf_sum += tmp;
+    }
+    idf_average = idf_sum / elements_num;
+    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
+
+    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
+    write(fd, &elements_num, sizeof(int));
+    write(fd, &offset, sizeof(int));
+    data_trie_size = this->GetDataTrieSize();
+    write(fd, &data_trie_size, sizeof(int));
+    write(fd, &idf_average, sizeof(double));
+
+    close(fd);
+    assert((size_t)write_bytes == sizeof(IdfCacheFileHeader) + offset + this->GetDataTrieTotalSize());
+
+    tryRename(tmp_filepath, dat_cache_file);
+}
+
+double IdfTrie::Find(const string &key) const
+{
+    int result = this->ExactMatchSearch(key.c_str(), key.size());
+    if (result < 0)
+        return -1;
+    return this->GetElementPtr()[result];
+}
+
+double IdfTrie::GetIdfAverage() const
+{
+    return this->GetCacheFileHeaderPtr()->idf_average;
+}
+
--- a/libchinese-segmentation/cppjieba/idf-trie/idf-trie.h
+++ b/libchinese-segmentation/cppjieba/idf-trie/idf-trie.h
@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef IdfTrie_H
+#define IdfTrie_H
+
+#include "storage-base.hpp"
+
+const char * const  IDF_DICT_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
+
+struct IdfCacheFileHeader : CacheFileHeaderBase
+{
+    double idf_average = 0;
+};
+
+class IdfTrie : public StorageBase<double, false, IdfCacheFileHeader>
+{
+public:
+    IdfTrie(const vector<string> file_paths, string dat_cache_path);
+    IdfTrie(string file_path, string dat_cache_path);
+    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
+    double Find(const string &key) const;
+    double GetIdfAverage() const;
+
+private:
+
+};
+
+#endif // IdfTrie_H
--- a/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp
@ -0,0 +1,70 @@
+/************************************
+ * file enc : ascii
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+
+#ifndef LIMONP_ARGV_FUNCTS_H
+#define LIMONP_ARGV_FUNCTS_H
+
+#include <set>
+#include <sstream>
+#include "StringUtil.hpp"
+
+namespace limonp {
+
+using namespace std;
+
+class ArgvContext {
+ public :
+  ArgvContext(int argc, const char* const * argv) {
+    for(int i = 0; i < argc; i++) {
+      if(StartsWith(argv[i], "-")) {
+        if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
+          mpss_[argv[i]] = argv[i+1];
+          i++;
+        } else {
+          sset_.insert(argv[i]);
+        }
+      } else {
+        args_.push_back(argv[i]);
+      }
+    }
+  }
+  ~ArgvContext() {
+  }
+
+  friend ostream& operator << (ostream& os, const ArgvContext& args);
+  string operator [](size_t i) const {
+    if(i < args_.size()) {
+      return args_[i];
+    }
+    return "";
+  }
+  string operator [](const string& key) const {
+    map<string, string>::const_iterator it = mpss_.find(key);
+    if(it != mpss_.end()) {
+      return it->second;
+    }
+    return "";
+  }
+
+  bool HasKey(const string& key) const {
+    if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
+      return true;
+    }
+    return false;
+  }
+
+ private:
+  vector<string> args_;
+  map<string, string> mpss_;
+  set<string> sset_;
+}; // class ArgvContext
+
+inline ostream& operator << (ostream& os, const ArgvContext& args) {
+  return os<<args.args_<<args.mpss_<<args.sset_;
+}
+
+} // namespace limonp
+
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/BlockingQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BlockingQueue.hpp
@ -0,0 +1,49 @@
+#ifndef LIMONP_BLOCKINGQUEUE_HPP
+#define LIMONP_BLOCKINGQUEUE_HPP
+
+#include <queue>
+#include "Condition.hpp"
+
+namespace limonp {
+template<class T>
+class BlockingQueue: NonCopyable {
+ public:
+  BlockingQueue()
+    : mutex_(), notEmpty_(mutex_), queue_() {
+  }
+
+  void Push(const T& x) {
+    MutexLockGuard lock(mutex_);
+    queue_.push(x);
+    notEmpty_.Notify(); // Wait morphing saves us
+  }
+
+  T Pop() {
+    MutexLockGuard lock(mutex_);
+    // always use a while-loop, due to spurious wakeup
+    while (queue_.empty()) {
+      notEmpty_.Wait();
+    }
+    assert(!queue_.empty());
+    T front(queue_.front());
+    queue_.pop();
+    return front;
+  }
+
+  size_t Size() const {
+    MutexLockGuard lock(mutex_);
+    return queue_.size();
+  }
+  bool Empty() const {
+    return Size() == 0;
+  }
+
+ private:
+  mutable MutexLock mutex_;
+  Condition         notEmpty_;
+  std::queue<T>     queue_;
+}; // class BlockingQueue
+
+} // namespace limonp
+
+#endif // LIMONP_BLOCKINGQUEUE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp
@ -0,0 +1,67 @@
+#ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
+#define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
+
+#include "BoundedQueue.hpp"
+
+namespace limonp {
+
+template<typename T>
+class BoundedBlockingQueue : NonCopyable {
+ public:
+  explicit BoundedBlockingQueue(size_t maxSize)
+    : mutex_(),
+      notEmpty_(mutex_),
+      notFull_(mutex_),
+      queue_(maxSize) {
+  }
+
+  void Push(const T& x) {
+    MutexLockGuard lock(mutex_);
+    while (queue_.Full()) {
+      notFull_.Wait();
+    }
+    assert(!queue_.Full());
+    queue_.Push(x);
+    notEmpty_.Notify();
+  }
+
+  T Pop() {
+    MutexLockGuard lock(mutex_);
+    while (queue_.Empty()) {
+      notEmpty_.Wait();
+    }
+    assert(!queue_.Empty());
+    T res = queue_.Pop();
+    notFull_.Notify();
+    return res;
+  }
+
+  bool Empty() const {
+    MutexLockGuard lock(mutex_);
+    return queue_.Empty();
+  }
+
+  bool Full() const {
+    MutexLockGuard lock(mutex_);
+    return queue_.Full();
+  }
+
+  size_t size() const {
+    MutexLockGuard lock(mutex_);
+    return queue_.size();
+  }
+
+  size_t capacity() const {
+    return queue_.capacity();
+  }
+
+ private:
+  mutable MutexLock          mutex_;
+  Condition                  notEmpty_;
+  Condition                  notFull_;
+  BoundedQueue<T>  queue_;
+}; // class BoundedBlockingQueue
+
+} // namespace limonp
+
+#endif // LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp
@ -0,0 +1,65 @@
+#ifndef LIMONP_BOUNDED_QUEUE_HPP
+#define LIMONP_BOUNDED_QUEUE_HPP
+
+#include <vector>
+#include <fstream>
+#include <cassert>
+
+namespace limonp {
+using namespace std;
+template<class T>
+class BoundedQueue {
+ public:
+  explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
+    head_ = 0;
+    tail_ = 0;
+    size_ = 0;
+    assert(capacity_);
+  }
+  ~BoundedQueue() {
+  }
+
+  void Clear() {
+    head_ = 0;
+    tail_ = 0;
+    size_ = 0;
+  }
+  bool Empty() const {
+    return !size_;
+  }
+  bool Full() const {
+    return capacity_ == size_;
+  }
+  size_t Size() const {
+    return size_;
+  }
+  size_t Capacity() const {
+    return capacity_;
+  }
+
+  void Push(const T& t) {
+    assert(!Full());
+    circular_buffer_[tail_] = t;
+    tail_ = (tail_ + 1) % capacity_;
+    size_ ++;
+  }
+
+  T Pop() {
+    assert(!Empty());
+    size_t oldPos = head_;
+    head_ = (head_ + 1) % capacity_;
+    size_ --;
+    return circular_buffer_[oldPos];
+  }
+
+ private:
+  size_t head_;
+  size_t tail_;
+  size_t size_;
+  const size_t capacity_;
+  vector<T> circular_buffer_;
+
+}; // class BoundedQueue
+} // namespace limonp
+
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/Closure.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Closure.hpp
@ -0,0 +1,206 @@
+#ifndef LIMONP_CLOSURE_HPP
+#define LIMONP_CLOSURE_HPP
+
+namespace limonp {
+
+class ClosureInterface {
+ public:
+  virtual ~ClosureInterface() {
+  }
+  virtual void Run() = 0;
+};
+
+template <class Funct>
+class Closure0: public ClosureInterface {
+ public:
+  Closure0(Funct fun) {
+    fun_ = fun;
+  }
+  virtual ~Closure0() {
+  }
+  virtual void Run() {
+    (*fun_)();
+  }
+ private:
+  Funct fun_;
+}; 
+
+template <class Funct, class Arg1>
+class Closure1: public ClosureInterface {
+ public:
+  Closure1(Funct fun, Arg1 arg1) {
+    fun_ = fun;
+    arg1_ = arg1;
+  }
+  virtual ~Closure1() {
+  }
+  virtual void Run() {
+    (*fun_)(arg1_);
+  }
+ private:
+  Funct fun_;
+  Arg1 arg1_;
+}; 
+
+template <class Funct, class Arg1, class Arg2>
+class Closure2: public ClosureInterface {
+ public:
+  Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
+    fun_ = fun;
+    arg1_ = arg1;
+    arg2_ = arg2;
+  }
+  virtual ~Closure2() {
+  }
+  virtual void Run() {
+    (*fun_)(arg1_, arg2_);
+  }
+ private:
+  Funct fun_;
+  Arg1 arg1_;
+  Arg2 arg2_;
+}; 
+
+template <class Funct, class Arg1, class Arg2, class Arg3>
+class Closure3: public ClosureInterface {
+ public:
+  Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
+    fun_ = fun;
+    arg1_ = arg1;
+    arg2_ = arg2;
+    arg3_ = arg3;
+  }
+  virtual ~Closure3() {
+  }
+  virtual void Run() {
+    (*fun_)(arg1_, arg2_, arg3_);
+  }
+ private:
+  Funct fun_;
+  Arg1 arg1_;
+  Arg2 arg2_;
+  Arg3 arg3_;
+}; 
+
+template <class Obj, class Funct> 
+class ObjClosure0: public ClosureInterface {
+ public:
+  ObjClosure0(Obj* p, Funct fun) {
+   p_ = p;
+   fun_ = fun;
+  }
+  virtual ~ObjClosure0() {
+  }
+  virtual void Run() {
+    (p_->*fun_)();
+  }
+ private:
+  Obj* p_;
+  Funct fun_;
+}; 
+
+template <class Obj, class Funct, class Arg1> 
+class ObjClosure1: public ClosureInterface {
+ public:
+  ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
+   p_ = p;
+   fun_ = fun;
+   arg1_ = arg1;
+  }
+  virtual ~ObjClosure1() {
+  }
+  virtual void Run() {
+    (p_->*fun_)(arg1_);
+  }
+ private:
+  Obj* p_;
+  Funct fun_;
+  Arg1 arg1_;
+}; 
+
+template <class Obj, class Funct, class Arg1, class Arg2> 
+class ObjClosure2: public ClosureInterface {
+ public:
+  ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
+   p_ = p;
+   fun_ = fun;
+   arg1_ = arg1;
+   arg2_ = arg2;
+  }
+  virtual ~ObjClosure2() {
+  }
+  virtual void Run() {
+    (p_->*fun_)(arg1_, arg2_);
+  }
+ private:
+  Obj* p_;
+  Funct fun_;
+  Arg1 arg1_;
+  Arg2 arg2_;
+}; 
+template <class Obj, class Funct, class Arg1, class Arg2, class Arg3> 
+class ObjClosure3: public ClosureInterface {
+ public:
+  ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
+   p_ = p;
+   fun_ = fun;
+   arg1_ = arg1;
+   arg2_ = arg2;
+   arg3_ = arg3;
+  }
+  virtual ~ObjClosure3() {
+  }
+  virtual void Run() {
+    (p_->*fun_)(arg1_, arg2_, arg3_);
+  }
+ private:
+  Obj* p_;
+  Funct fun_;
+  Arg1 arg1_;
+  Arg2 arg2_;
+  Arg3 arg3_;
+}; 
+
+template<class R>
+ClosureInterface* NewClosure(R (*fun)()) {
+  return new Closure0<R (*)()>(fun);
+}
+
+template<class R, class Arg1>
+ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) {
+  return new Closure1<R (*)(Arg1), Arg1>(fun, arg1);
+}
+
+template<class R, class Arg1, class Arg2>
+ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
+  return new Closure2<R (*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
+}
+
+template<class R, class Arg1, class Arg2, class Arg3>
+ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
+  return new Closure3<R (*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
+}
+
+template<class R, class Obj>
+ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) {
+  return new ObjClosure0<Obj, R (Obj::* )()>(obj, fun);
+}
+
+template<class R, class Obj, class Arg1>
+ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) {
+  return new ObjClosure1<Obj, R (Obj::* )(Arg1), Arg1>(obj, fun, arg1);
+}
+
+template<class R, class Obj, class Arg1, class Arg2>
+ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
+  return new ObjClosure2<Obj, R (Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
+}
+
+template<class R, class Obj, class Arg1, class Arg2, class Arg3>
+ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
+  return new ObjClosure3<Obj, R (Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
+}
+
+} // namespace limonp
+
+#endif // LIMONP_CLOSURE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Colors.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Colors.hpp
@ -0,0 +1,31 @@
+#ifndef LIMONP_COLOR_PRINT_HPP
+#define LIMONP_COLOR_PRINT_HPP
+
+#include <string>
+#include <stdarg.h>
+
+namespace limonp {
+
+using std::string;
+
+enum Color {
+  BLACK = 30,
+  RED,
+  GREEN,
+  YELLOW,
+  BLUE,
+  PURPLE
+}; // enum Color
+
+static void ColorPrintln(enum Color color, const char * fmt, ...) {
+  va_list ap;
+  printf("\033[0;%dm", color);
+  va_start(ap, fmt);
+  vprintf(fmt, ap);
+  va_end(ap);
+  printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
+}
+
+} // namespace limonp
+
+#endif // LIMONP_COLOR_PRINT_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Condition.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Condition.hpp
@ -0,0 +1,38 @@
+#ifndef LIMONP_CONDITION_HPP
+#define LIMONP_CONDITION_HPP
+
+#include "MutexLock.hpp"
+
+namespace limonp {
+
+class Condition : NonCopyable {
+ public:
+  explicit Condition(MutexLock& mutex)
+    : mutex_(mutex) {
+    XCHECK(!pthread_cond_init(&pcond_, NULL));
+  }
+
+  ~Condition() {
+    XCHECK(!pthread_cond_destroy(&pcond_));
+  }
+
+  void Wait() {
+    XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
+  }
+
+  void Notify() {
+    XCHECK(!pthread_cond_signal(&pcond_));
+  }
+
+  void NotifyAll() {
+    XCHECK(!pthread_cond_broadcast(&pcond_));
+  }
+
+ private:
+  MutexLock& mutex_;
+  pthread_cond_t pcond_;
+}; // class Condition
+
+} // namespace limonp
+
+#endif // LIMONP_CONDITION_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Config.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Config.hpp
@ -0,0 +1,103 @@
+/************************************
+ * file enc : utf8
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+#ifndef LIMONP_CONFIG_H
+#define LIMONP_CONFIG_H
+
+#include <map>
+#include <fstream>
+#include <iostream>
+#include <assert.h>
+#include "StringUtil.hpp"
+
+namespace limonp {
+
+using namespace std;
+
+class Config {
+ public:
+  explicit Config(const string& filePath) {
+    LoadFile(filePath);
+  }
+
+  operator bool () {
+    return !map_.empty();
+  }
+
+  string Get(const string& key, const string& defaultvalue) const {
+    map<string, string>::const_iterator it = map_.find(key);
+    if(map_.end() != it) {
+      return it->second;
+    }
+    return defaultvalue;
+  }
+  int Get(const string& key, int defaultvalue) const {
+    string str = Get(key, "");
+    if("" == str) {
+      return defaultvalue;
+    }
+    return atoi(str.c_str());
+  }
+  const char* operator [] (const char* key) const {
+    if(NULL == key) {
+      return NULL;
+    }
+    map<string, string>::const_iterator it = map_.find(key);
+    if(map_.end() != it) {
+      return it->second.c_str();
+    }
+    return NULL;
+  }
+
+  string GetConfigInfo() const {
+    string res;
+    res << *this;
+    return res;
+  }
+
+ private:
+  void LoadFile(const string& filePath) {
+    ifstream ifs(filePath.c_str());
+    assert(ifs);
+    string line;
+    vector<string> vecBuf;
+    size_t lineno = 0;
+    while(getline(ifs, line)) {
+      lineno ++;
+      Trim(line);
+      if(line.empty() || StartsWith(line, "#")) {
+        continue;
+      }
+      vecBuf.clear();
+      Split(line, vecBuf, "=");
+      if(2 != vecBuf.size()) {
+        fprintf(stderr, "line[%s] illegal.\n", line.c_str());
+        assert(false);
+        continue;
+      }
+      string& key = vecBuf[0];
+      string& value = vecBuf[1];
+      Trim(key);
+      Trim(value);
+      if(!map_.insert(make_pair(key, value)).second) {
+        fprintf(stderr, "key[%s] already exits.\n", key.c_str());
+        assert(false);
+        continue;
+      }
+    }
+    ifs.close();
+  }
+
+  friend ostream& operator << (ostream& os, const Config& config);
+
+  map<string, string> map_;
+}; // class Config
+
+inline ostream& operator << (ostream& os, const Config& config) {
+  return os << config.map_;
+}
+
+} // namespace limonp
+
+#endif // LIMONP_CONFIG_H
--- a/libchinese-segmentation/cppjieba/limonp/FileLock.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/FileLock.hpp
@ -0,0 +1,74 @@
+#ifndef LIMONP_FILELOCK_HPP
+#define LIMONP_FILELOCK_HPP
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string>
+#include <string.h>
+#include <assert.h>
+
+namespace limonp {
+
+using std::string;
+
+class FileLock {
+ public:
+  FileLock() : fd_(-1), ok_(true) {
+  }
+  ~FileLock() {
+    if(fd_ > 0) {
+      Close();
+    }
+  }
+  void Open(const string& fname) {
+    assert(fd_ == -1);
+    fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
+    if(fd_ < 0) {
+      ok_ = false;
+      err_ = strerror(errno);
+    }
+  }
+  void Close() {
+    ::close(fd_);
+  }
+  void Lock() {
+    if(LockOrUnlock(fd_, true) < 0) {
+      ok_ = false;
+      err_ = strerror(errno);
+    }
+  }
+  void UnLock() {
+    if(LockOrUnlock(fd_, false) < 0) {
+      ok_ = false;
+      err_ = strerror(errno);
+    }
+  }
+  bool Ok() const {
+    return ok_;
+  }
+  string Error() const {
+    return err_;
+  }
+ private:
+  static int LockOrUnlock(int fd, bool lock) {
+    errno = 0;
+    struct flock f;
+    memset(&f, 0, sizeof(f));
+    f.l_type = (lock ? F_WRLCK : F_UNLCK);
+    f.l_whence = SEEK_SET;
+    f.l_start = 0;
+    f.l_len = 0;        // Lock/unlock entire file
+    return fcntl(fd, F_SETLK, &f);
+  }
+
+  int fd_;
+  bool ok_;
+  string err_;
+}; // class FileLock
+
+}// namespace limonp
+
+#endif // LIMONP_FILELOCK_HPP
--- a/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp
@ -0,0 +1,7 @@
+#ifndef LIMONP_FORCE_PUBLIC_H
+#define LIMONP_FORCE_PUBLIC_H
+
+#define private public
+#define protected public
+
+#endif // LIMONP_FORCE_PUBLIC_H
--- a/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp
@ -0,0 +1,142 @@
+#ifndef LIMONP_LOCAL_VECTOR_HPP
+#define LIMONP_LOCAL_VECTOR_HPP
+
+#include <iostream>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+namespace limonp {
+using namespace std;
+/*
+ * LocalVector<T> : T must be primitive type (char , int, size_t), if T is struct or class, LocalVector<T> may be dangerous..
+ * LocalVector<T> is simple and not well-tested.
+ */
+const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
+template <class T>
+class LocalVector {
+ public:
+  typedef const T* const_iterator ;
+  typedef T value_type;
+  typedef size_t size_type;
+ private:
+  T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
+  T * ptr_;
+  size_t size_;
+  size_t capacity_;
+ public:
+  LocalVector() {
+    init_();
+  };
+  LocalVector(const LocalVector<T>& vec) {
+    init_();
+    *this = vec;
+  }
+  LocalVector(const_iterator  begin, const_iterator end) { // TODO: make it faster
+    init_();
+    while(begin != end) {
+      push_back(*begin++);
+    }
+  }
+  LocalVector(size_t size, const T& t) { // TODO: make it faster
+    init_();
+    while(size--) {
+      push_back(t);
+    }
+  }
+  ~LocalVector() {
+    if(ptr_ != buffer_) {
+      free(ptr_);
+    }
+  };
+ public:
+  LocalVector<T>& operator = (const LocalVector<T>& vec) {
+      if(this == &vec){
+          return *this;
+      }
+    clear();
+    size_ = vec.size();
+    capacity_ = vec.capacity();
+    if(vec.buffer_ == vec.ptr_) {
+      memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
+      ptr_ = buffer_;
+    } else {
+      ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
+      assert(ptr_);
+      memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
+    }
+    return *this;
+  }
+ private:
+  void init_() {
+    ptr_ = buffer_;
+    size_ = 0;
+    capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
+  }
+ public:
+  T& operator [] (size_t i) {
+    return ptr_[i];
+  }
+  const T& operator [] (size_t i) const {
+    return ptr_[i];
+  }
+  void push_back(const T& t) {
+    if(size_ == capacity_) {
+      assert(capacity_);
+      reserve(capacity_ * 2);
+    }
+    ptr_[size_ ++ ] = t;
+  }
+  void reserve(size_t size) {
+    if(size <= capacity_) {
+      return;
+    }
+    T * next =  (T*)malloc(sizeof(T) * size);
+    assert(next);
+    T * old = ptr_;
+    ptr_ = next;
+    memcpy(ptr_, old, sizeof(T) * capacity_);
+    capacity_ = size;
+    if(old != buffer_) {
+      free(old);
+    }
+  }
+  bool empty() const {
+    return 0 == size();
+  }
+  size_t size() const {
+    return size_;
+  }
+  size_t capacity() const {
+    return capacity_;
+  }
+  const_iterator begin() const {
+    return ptr_;
+  }
+  const_iterator end() const {
+    return ptr_ + size_;
+  }
+  void clear() {
+    if(ptr_ != buffer_) {
+      free(ptr_);
+    }
+    init_();
+  }
+};
+
+template <class T>
+ostream & operator << (ostream& os, const LocalVector<T>& vec) {
+  if(vec.empty()) {
+    return os << "[]";
+  }
+  os<<"[\""<<vec[0];
+  for(size_t i = 1; i < vec.size(); i++) {
+    os<<"\", \""<<vec[i];
+  }
+  os<<"\"]";
+  return os;
+}
+
+}
+
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/Logging.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Logging.hpp
@ -0,0 +1,77 @@
+#ifndef LIMONP_LOGGING_HPP
+#define LIMONP_LOGGING_HPP
+
+#include <sstream>
+#include <iostream>
+#include <cassert>
+#include <cstdlib>
+#include <ctime>
+
+#ifdef XLOG
+#error "XLOG has been defined already"
+#endif // XLOG
+#ifdef XCHECK
+#error "XCHECK has been defined already"
+#endif // XCHECK
+
+#define XLOG(level) limonp::Logger(limonp::LL_##level, __FILE__, __LINE__).Stream()
+#define XCHECK(exp) if(!(exp)) XLOG(FATAL) << "exp: ["#exp << "] false. "
+
+namespace limonp {
+
+enum {
+  LL_DEBUG = 0,
+  LL_INFO = 1,
+  LL_WARNING = 2,
+  LL_ERROR = 3,
+  LL_FATAL = 4,
+}; // enum
+
+static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
+
+class Logger {
+ public:
+  Logger(size_t level, const char* filename, int lineno)
+   : level_(level) {
+#ifdef LOGGING_LEVEL
+     if (level_ < LOGGING_LEVEL) {
+       return;
+     }
+#endif
+    assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
+    char buf[32];
+    time_t now;
+    time(&now);
+    struct tm result;
+    localtime_r(&now, &result);
+    strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &result);
+    stream_ << buf
+      << " " << filename
+      << ":" << lineno
+      << " " << LOG_LEVEL_ARRAY[level_]
+      << " ";
+  }
+  ~Logger() {
+#ifdef LOGGING_LEVEL
+     if (level_ < LOGGING_LEVEL) {
+       return;
+     }
+#endif
+    std::cerr << stream_.str() << std::endl;
+    if (level_ == LL_FATAL) {
+      abort();
+    }
+  }
+
+  std::ostream& Stream() {
+    return stream_;
+  }
+
+ private:
+  std::ostringstream stream_;
+  size_t level_;
+}; // class Logger
+
+} // namespace limonp
+
+#endif // LIMONP_LOGGING_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Md5.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Md5.hpp
@ -0,0 +1,415 @@
+/****************************************************************************
+**Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991
+**              2020, KylinSoft Co., Ltd.
+**All rights reserved.
+**
+**License to copy and use this software is granted provided that it
+**is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+**Algorithm" in all material mentioning or referencing this software
+**or this function.
+**
+**License is also granted to make and use derivative works provided
+**that such works are identified as "derived from the RSA Data
+**Security, Inc. MD5 Message-Digest Algorithm" in all material
+**mentioning or referencing the derived work.
+**
+**RSA Data Security, Inc. makes no representations concerning either
+**the merchantability of this software or the suitability of this
+**software for any particular purpose. It is provided "as is"
+**without express or implied warranty of any kind.
+**
+**These notices must be retained in any copies of any part of this
+**documentation and/or software.
+**
+**
+**
+**The original md5 implementation avoids external libraries.
+**This version has dependency on stdio.h for file input and
+**string.h for memcpy.
+**
+****************************************************************************/
+
+#ifndef __MD5_H__
+#define __MD5_H__
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+
+namespace limonp {
+
+//#pragma region MD5 defines
+// Constants for MD5Transform routine.
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+
+// F, G, H and I are basic MD5 functions.
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+// ROTATE_LEFT rotates x left n bits.
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+
+// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+// Rotation is separate from addition to prevent recomputation.
+#define FF(a, b, c, d, x, s, ac) { \
+  (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+#define GG(a, b, c, d, x, s, ac) { \
+  (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+#define HH(a, b, c, d, x, s, ac) { \
+  (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+#define II(a, b, c, d, x, s, ac) { \
+  (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+//#pragma endregion
+
+
+typedef unsigned char BYTE ;
+
+// POINTER defines a generic pointer type
+typedef unsigned char *POINTER;
+
+// UINT2 defines a two byte word
+typedef unsigned short int UINT2;
+
+// UINT4 defines a four byte word
+typedef unsigned int UINT4;
+
+static unsigned char PADDING[64] = {
+    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+// convenient object that wraps
+// the C-functions for use in C++ only
+class MD5 {
+private:
+    struct __context_t {
+        UINT4 state[4];                                   /* state (ABCD) */
+        UINT4 count[2];        /* number of bits, modulo 2^64 (lsb first) */
+        unsigned char buffer[64];                         /* input buffer */
+    } context ;
+
+    //#pragma region static helper functions
+    // The core of the MD5 algorithm is here.
+    // MD5 basic transformation. Transforms state based on block.
+    static void MD5Transform(UINT4 state[4], unsigned char block[64]) {
+        UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+
+        Decode(x, block, 64);
+
+        /* Round 1 */
+        FF(a, b, c, d, x[ 0], S11, 0xd76aa478);  /* 1 */
+        FF(d, a, b, c, x[ 1], S12, 0xe8c7b756);  /* 2 */
+        FF(c, d, a, b, x[ 2], S13, 0x242070db);  /* 3 */
+        FF(b, c, d, a, x[ 3], S14, 0xc1bdceee);  /* 4 */
+        FF(a, b, c, d, x[ 4], S11, 0xf57c0faf);  /* 5 */
+        FF(d, a, b, c, x[ 5], S12, 0x4787c62a);  /* 6 */
+        FF(c, d, a, b, x[ 6], S13, 0xa8304613);  /* 7 */
+        FF(b, c, d, a, x[ 7], S14, 0xfd469501);  /* 8 */
+        FF(a, b, c, d, x[ 8], S11, 0x698098d8);  /* 9 */
+        FF(d, a, b, c, x[ 9], S12, 0x8b44f7af);  /* 10 */
+        FF(c, d, a, b, x[10], S13, 0xffff5bb1);  /* 11 */
+        FF(b, c, d, a, x[11], S14, 0x895cd7be);  /* 12 */
+        FF(a, b, c, d, x[12], S11, 0x6b901122);  /* 13 */
+        FF(d, a, b, c, x[13], S12, 0xfd987193);  /* 14 */
+        FF(c, d, a, b, x[14], S13, 0xa679438e);  /* 15 */
+        FF(b, c, d, a, x[15], S14, 0x49b40821);  /* 16 */
+
+        /* Round 2 */
+        GG(a, b, c, d, x[ 1], S21, 0xf61e2562);  /* 17 */
+        GG(d, a, b, c, x[ 6], S22, 0xc040b340);  /* 18 */
+        GG(c, d, a, b, x[11], S23, 0x265e5a51);  /* 19 */
+        GG(b, c, d, a, x[ 0], S24, 0xe9b6c7aa);  /* 20 */
+        GG(a, b, c, d, x[ 5], S21, 0xd62f105d);  /* 21 */
+        GG(d, a, b, c, x[10], S22,  0x2441453);  /* 22 */
+        GG(c, d, a, b, x[15], S23, 0xd8a1e681);  /* 23 */
+        GG(b, c, d, a, x[ 4], S24, 0xe7d3fbc8);  /* 24 */
+        GG(a, b, c, d, x[ 9], S21, 0x21e1cde6);  /* 25 */
+        GG(d, a, b, c, x[14], S22, 0xc33707d6);  /* 26 */
+        GG(c, d, a, b, x[ 3], S23, 0xf4d50d87);  /* 27 */
+        GG(b, c, d, a, x[ 8], S24, 0x455a14ed);  /* 28 */
+        GG(a, b, c, d, x[13], S21, 0xa9e3e905);  /* 29 */
+        GG(d, a, b, c, x[ 2], S22, 0xfcefa3f8);  /* 30 */
+        GG(c, d, a, b, x[ 7], S23, 0x676f02d9);  /* 31 */
+        GG(b, c, d, a, x[12], S24, 0x8d2a4c8a);  /* 32 */
+
+        /* Round 3 */
+        HH(a, b, c, d, x[ 5], S31, 0xfffa3942);  /* 33 */
+        HH(d, a, b, c, x[ 8], S32, 0x8771f681);  /* 34 */
+        HH(c, d, a, b, x[11], S33, 0x6d9d6122);  /* 35 */
+        HH(b, c, d, a, x[14], S34, 0xfde5380c);  /* 36 */
+        HH(a, b, c, d, x[ 1], S31, 0xa4beea44);  /* 37 */
+        HH(d, a, b, c, x[ 4], S32, 0x4bdecfa9);  /* 38 */
+        HH(c, d, a, b, x[ 7], S33, 0xf6bb4b60);  /* 39 */
+        HH(b, c, d, a, x[10], S34, 0xbebfbc70);  /* 40 */
+        HH(a, b, c, d, x[13], S31, 0x289b7ec6);  /* 41 */
+        HH(d, a, b, c, x[ 0], S32, 0xeaa127fa);  /* 42 */
+        HH(c, d, a, b, x[ 3], S33, 0xd4ef3085);  /* 43 */
+        HH(b, c, d, a, x[ 6], S34,  0x4881d05);  /* 44 */
+        HH(a, b, c, d, x[ 9], S31, 0xd9d4d039);  /* 45 */
+        HH(d, a, b, c, x[12], S32, 0xe6db99e5);  /* 46 */
+        HH(c, d, a, b, x[15], S33, 0x1fa27cf8);  /* 47 */
+        HH(b, c, d, a, x[ 2], S34, 0xc4ac5665);  /* 48 */
+
+        /* Round 4 */
+        II(a, b, c, d, x[ 0], S41, 0xf4292244);  /* 49 */
+        II(d, a, b, c, x[ 7], S42, 0x432aff97);  /* 50 */
+        II(c, d, a, b, x[14], S43, 0xab9423a7);  /* 51 */
+        II(b, c, d, a, x[ 5], S44, 0xfc93a039);  /* 52 */
+        II(a, b, c, d, x[12], S41, 0x655b59c3);  /* 53 */
+        II(d, a, b, c, x[ 3], S42, 0x8f0ccc92);  /* 54 */
+        II(c, d, a, b, x[10], S43, 0xffeff47d);  /* 55 */
+        II(b, c, d, a, x[ 1], S44, 0x85845dd1);  /* 56 */
+        II(a, b, c, d, x[ 8], S41, 0x6fa87e4f);  /* 57 */
+        II(d, a, b, c, x[15], S42, 0xfe2ce6e0);  /* 58 */
+        II(c, d, a, b, x[ 6], S43, 0xa3014314);  /* 59 */
+        II(b, c, d, a, x[13], S44, 0x4e0811a1);  /* 60 */
+        II(a, b, c, d, x[ 4], S41, 0xf7537e82);  /* 61 */
+        II(d, a, b, c, x[11], S42, 0xbd3af235);  /* 62 */
+        II(c, d, a, b, x[ 2], S43, 0x2ad7d2bb);  /* 63 */
+        II(b, c, d, a, x[ 9], S44, 0xeb86d391);  /* 64 */
+
+        state[0] += a;
+        state[1] += b;
+        state[2] += c;
+        state[3] += d;
+
+        // Zeroize sensitive information.
+        memset((POINTER)x, 0, sizeof(x));
+    }
+
+    // Encodes input (UINT4) into output (unsigned char). Assumes len is
+    // a multiple of 4.
+    static void Encode(unsigned char *output, UINT4 *input, unsigned int len) {
+        unsigned int i, j;
+
+        for(i = 0, j = 0; j < len; i++, j += 4) {
+            output[j] = (unsigned char)(input[i] & 0xff);
+            output[j + 1] = (unsigned char)((input[i] >> 8) & 0xff);
+            output[j + 2] = (unsigned char)((input[i] >> 16) & 0xff);
+            output[j + 3] = (unsigned char)((input[i] >> 24) & 0xff);
+        }
+    }
+
+    // Decodes input (unsigned char) into output (UINT4). Assumes len is
+    // a multiple of 4.
+    static void Decode(UINT4 *output, unsigned char *input, unsigned int len) {
+        unsigned int i, j;
+
+        for(i = 0, j = 0; j < len; i++, j += 4)
+            output[i] = ((UINT4)input[j]) | (((UINT4)input[j + 1]) << 8) |
+                        (((UINT4)input[j + 2]) << 16) | (((UINT4)input[j + 3]) << 24);
+    }
+    //#pragma endregion
+
+
+public:
+    // MAIN FUNCTIONS
+    MD5() {
+        Init() ;
+    }
+
+    // MD5 initialization. Begins an MD5 operation, writing a new context.
+    void Init() {
+        context.count[0] = context.count[1] = 0;
+
+        // Load magic initialization constants.
+        context.state[0] = 0x67452301;
+        context.state[1] = 0xefcdab89;
+        context.state[2] = 0x98badcfe;
+        context.state[3] = 0x10325476;
+    }
+
+    // MD5 block update operation. Continues an MD5 message-digest
+    // operation, processing another message block, and updating the
+    // context.
+    void Update(
+        unsigned char *input,   // input block
+        unsigned int inputLen) {  // length of input block
+        unsigned int i, index, partLen;
+
+        // Compute number of bytes mod 64
+        index = (unsigned int)((context.count[0] >> 3) & 0x3F);
+
+        // Update number of bits
+        if((context.count[0] += ((UINT4)inputLen << 3))
+                < ((UINT4)inputLen << 3))
+            context.count[1]++;
+        context.count[1] += ((UINT4)inputLen >> 29);
+
+        partLen = 64 - index;
+
+        // Transform as many times as possible.
+        if(inputLen >= partLen) {
+            memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
+            MD5Transform(context.state, context.buffer);
+
+            for(i = partLen; i + 63 < inputLen; i += 64)
+                MD5Transform(context.state, &input[i]);
+
+            index = 0;
+        } else
+            i = 0;
+
+        /* Buffer remaining input */
+        memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen - i);
+    }
+
+    // MD5 finalization. Ends an MD5 message-digest operation, writing the
+    // the message digest and zeroizing the context.
+    // Writes to digestRaw
+    void Final() {
+        unsigned char bits[8];
+        unsigned int index, padLen;
+
+        // Save number of bits
+        Encode(bits, context.count, 8);
+
+        // Pad out to 56 mod 64.
+        index = (unsigned int)((context.count[0] >> 3) & 0x3f);
+        padLen = (index < 56) ? (56 - index) : (120 - index);
+        Update(PADDING, padLen);
+
+        // Append length (before padding)
+        Update(bits, 8);
+
+        // Store state in digest
+        Encode(digestRaw, context.state, 16);
+
+        // Zeroize sensitive information.
+        memset((POINTER)&context, 0, sizeof(context));
+
+        writeToString() ;
+    }
+
+    /// Buffer must be 32+1 (nul) = 33 chars long at least
+    void writeToString() {
+        int pos ;
+
+        for(pos = 0 ; pos < 16 ; pos++)
+            sprintf(digestChars + (pos * 2), "%02x", digestRaw[pos]) ;
+    }
+
+
+public:
+    // an MD5 digest is a 16-byte number (32 hex digits)
+    BYTE digestRaw[ 16 ] ;
+
+    // This version of the digest is actually
+    // a "printf'd" version of the digest.
+    char digestChars[ 33 ] ;
+
+    /// Load a file from disk and digest it
+    // Digests a file and returns the result.
+    const char* digestFile(const char *filename) {
+        if(NULL == filename || strcmp(filename, "") == 0)
+            return NULL;
+
+        Init() ;
+
+        FILE *file;
+
+        unsigned char buffer[1024] ;
+
+        if((file = fopen(filename, "rb")) == NULL) {
+            return NULL;
+        }
+        int len;
+        while((len = fread(buffer, 1, 1024, file)))
+            Update(buffer, len) ;
+        Final();
+
+        fclose(file);
+
+        return digestChars ;
+    }
+
+    /// Digests a byte-array already in memory
+    const char* digestMemory(BYTE *memchunk, int len) {
+        if(NULL == memchunk)
+            return NULL;
+
+        Init() ;
+        Update(memchunk, len) ;
+        Final() ;
+
+        return digestChars ;
+    }
+
+    // Digests a string and prints the result.
+    const char* digestString(const char *string) {
+        if(string == NULL)
+            return NULL;
+
+        Init() ;
+        Update((unsigned char*)string, strlen(string)) ;
+        Final() ;
+
+        return digestChars ;
+    }
+};
+
+inline bool md5String(const char* str, std::string& res) {
+    if(NULL == str) {
+        res = "";
+        return false;
+    }
+
+    MD5 md5;
+    const char *pRes = md5.digestString(str);
+    if(NULL == pRes) {
+        res = "";
+        return false;
+    }
+
+    res = pRes;
+    return true;
+}
+
+inline bool md5File(const char* filepath, std::string& res) {
+    if(NULL == filepath || strcmp(filepath, "") == 0) {
+        res = "";
+        return false;
+    }
+
+    MD5 md5;
+    const char *pRes = md5.digestFile(filepath);
+
+    if(NULL == pRes) {
+        res = "";
+        return false;
+    }
+
+    res = pRes;
+    return true;
+}
+}
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp
@ -0,0 +1,51 @@
+#ifndef LIMONP_MUTEX_LOCK_HPP
+#define LIMONP_MUTEX_LOCK_HPP
+
+#include <pthread.h>
+#include "NonCopyable.hpp"
+#include "Logging.hpp"
+
+namespace limonp {
+
+class MutexLock: NonCopyable {
+ public:
+  MutexLock() {
+    XCHECK(!pthread_mutex_init(&mutex_, NULL));
+  }
+  ~MutexLock() {
+    XCHECK(!pthread_mutex_destroy(&mutex_));
+  }
+  pthread_mutex_t* GetPthreadMutex() {
+    return &mutex_;
+  }
+
+ private:
+  void Lock() {
+    XCHECK(!pthread_mutex_lock(&mutex_));
+  }
+  void Unlock() {
+    XCHECK(!pthread_mutex_unlock(&mutex_));
+  }
+  friend class MutexLockGuard;
+
+  pthread_mutex_t mutex_;
+}; // class MutexLock
+
+class MutexLockGuard: NonCopyable {
+ public:
+  explicit MutexLockGuard(MutexLock & mutex)
+    : mutex_(mutex) {
+    mutex_.Lock();
+  }
+  ~MutexLockGuard() {
+    mutex_.Unlock();
+  }
+ private:
+  MutexLock & mutex_;
+}; // class MutexLockGuard
+
+#define MutexLockGuard(x) XCHECK(false);
+
+} // namespace limonp
+
+#endif // LIMONP_MUTEX_LOCK_HPP
--- a/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp
@ -0,0 +1,21 @@
+/************************************
+ ************************************/
+#ifndef LIMONP_NONCOPYABLE_H
+#define LIMONP_NONCOPYABLE_H
+
+namespace limonp {
+
+class NonCopyable {
+ protected:
+  NonCopyable() {
+  }
+  ~NonCopyable() {
+  }
+ private:
+  NonCopyable(const NonCopyable& );
+  const NonCopyable& operator=(const NonCopyable& );
+}; // class NonCopyable
+
+} // namespace limonp
+
+#endif // LIMONP_NONCOPYABLE_H
--- a/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp
@ -0,0 +1,157 @@
+#ifndef LIMONP_STD_EXTEMSION_HPP
+#define LIMONP_STD_EXTEMSION_HPP
+
+#include <map>
+
+#ifdef __APPLE__
+#include <unordered_map>
+#include <unordered_set>
+#elif(__cplusplus >= 201103L)
+#include <unordered_map>
+#include <unordered_set>
+#elif defined _MSC_VER
+#include <unordered_map>
+#include <unordered_set>
+#else
+#include <tr1/unordered_map>
+#include <tr1/unordered_set>
+namespace std {
+using std::tr1::unordered_map;
+using std::tr1::unordered_set;
+}
+
+#endif
+
+#include <set>
+#include <string>
+#include <vector>
+#include <deque>
+#include <fstream>
+#include <sstream>
+
+namespace std {
+
+template<typename T>
+ostream& operator << (ostream& os, const vector<T>& v) {
+  if(v.empty()) {
+    return os << "[]";
+  }
+  os<<"["<<v[0];
+  for(size_t i = 1; i < v.size(); i++) {
+    os<<", "<<v[i];
+  }
+  os<<"]";
+  return os;
+}
+
+template<>
+inline ostream& operator << (ostream& os, const vector<string>& v) {
+  if(v.empty()) {
+    return os << "[]";
+  }
+  os<<"[\""<<v[0];
+  for(size_t i = 1; i < v.size(); i++) {
+    os<<"\", \""<<v[i];
+  }
+  os<<"\"]";
+  return os;
+}
+
+template<typename T>
+ostream& operator << (ostream& os, const deque<T>& dq) {
+  if(dq.empty()) {
+    return os << "[]";
+  }
+  os<<"[\""<<dq[0];
+  for(size_t i = 1; i < dq.size(); i++) {
+    os<<"\", \""<<dq[i];
+  }
+  os<<"\"]";
+  return os;
+}
+
+
+template<class T1, class T2>
+ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
+  os << pr.first << ":" << pr.second ;
+  return os;
+}
+
+
+template<class T>
+string& operator << (string& str, const T& obj) {
+  stringstream ss;
+  ss << obj; // call ostream& operator << (ostream& os,
+  return str = ss.str();
+}
+
+template<class T1, class T2>
+ostream& operator << (ostream& os, const map<T1, T2>& mp) {
+  if(mp.empty()) {
+    os<<"{}";
+    return os;
+  }
+  os<<'{';
+  typename map<T1, T2>::const_iterator it = mp.begin();
+  os<<*it;
+  it++;
+  while(it != mp.end()) {
+    os<<", "<<*it;
+    it++;
+  }
+  os<<'}';
+  return os;
+}
+template<class T1, class T2>
+ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
+  if(mp.empty()) {
+    return os << "{}";
+  }
+  os<<'{';
+  typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
+  os<<*it;
+  it++;
+  while(it != mp.end()) {
+    os<<", "<<*it++;
+  }
+  return os<<'}';
+}
+
+template<class T>
+ostream& operator << (ostream& os, const set<T>& st) {
+  if(st.empty()) {
+    os << "{}";
+    return os;
+  }
+  os<<'{';
+  typename set<T>::const_iterator it = st.begin();
+  os<<*it;
+  it++;
+  while(it != st.end()) {
+    os<<", "<<*it;
+    it++;
+  }
+  os<<'}';
+  return os;
+}
+
+template<class KeyType, class ContainType>
+bool IsIn(const ContainType& contain, const KeyType& key) {
+  return contain.end() != contain.find(key);
+}
+
+template<class T>
+basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
+  return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
+}
+
+template<class T>
+ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
+  ostreambuf_iterator<T> itr (ofs);
+  copy(s.begin(), s.end(), itr);
+  return ofs;
+}
+
+} // namespace std
+
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp
@ -0,0 +1,382 @@
+/************************************
+ * file enc : ascii
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+#ifndef LIMONP_STR_FUNCTS_H
+#define LIMONP_STR_FUNCTS_H
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <memory.h>
+#include <sys/types.h>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <cctype>
+#include <map>
+#include <functional>
+#include <locale>
+#include <sstream>
+#include <iterator>
+#include <algorithm>
+#include "StdExtension.hpp"
+
+namespace limonp {
+using namespace std;
+inline string StringFormat(const char* fmt, ...) {
+  int size = 256;
+  std::string str;
+  va_list ap;
+  while (1) {
+    str.resize(size);
+    va_start(ap, fmt);
+    int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
+    va_end(ap);
+    if (n > -1 && n < size) {
+      str.resize(n);
+      return str;
+    }
+    if (n > -1)
+      size = n + 1;
+    else
+      size *= 2;
+  }
+  return str;
+}
+
+template<class T>
+void Join(T begin, T end, string& res, const string& connector) {
+  if(begin == end) {
+    return;
+  }
+  stringstream ss;
+  ss<<*begin;
+  begin++;
+  while(begin != end) {
+    ss << connector << *begin;
+    begin ++;
+  }
+  res = ss.str();
+}
+
+template<class T>
+string Join(T begin, T end, const string& connector) {
+  string res;
+  Join(begin ,end, res, connector);
+  return res;
+}
+
+inline string& Upper(string& str) {
+  transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
+  return str;
+}
+
+inline string& Lower(string& str) {
+  transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
+  return str;
+}
+
+inline bool IsSpace(unsigned c) {
+  // when passing large int as the argument of isspace, it core dump, so here need a type cast.
+  return c > 0xff ? false : std::isspace(c & 0xff);
+}
+
+inline std::string& LTrim(std::string &s) {
+  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
+  return s;
+}
+
+inline std::string& RTrim(std::string &s) {
+  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
+  return s;
+}
+
+inline std::string& Trim(std::string &s) {
+  return LTrim(RTrim(s));
+}
+
+inline std::string& LTrim(std::string & s, char x) {
+  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
+  return s;
+}
+
+inline std::string& RTrim(std::string & s, char x) {
+  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
+  return s;
+}
+
+inline std::string& Trim(std::string &s, char x) {
+  return LTrim(RTrim(s, x), x);
+}
+
+inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
+  res.clear();
+  size_t Start = 0;
+  size_t end = 0;
+  string sub;
+  while(Start < src.size()) {
+    end = src.find_first_of(pattern, Start);
+    if(string::npos == end || res.size() >= maxsplit) {
+      sub = src.substr(Start);
+      res.push_back(sub);
+      return;
+    }
+    sub = src.substr(Start, end - Start);
+    res.push_back(sub);
+    Start = end + 1;
+  }
+  return;
+}
+
+inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
+  vector<string> res;
+  Split(src, res, pattern, maxsplit);
+  return res;
+}
+
+inline bool StartsWith(const string& str, const string& prefix) {
+  if(prefix.length() > str.length()) {
+    return false;
+  }
+  return 0 == str.compare(0, prefix.length(), prefix);
+}
+
+inline bool EndsWith(const string& str, const string& suffix) {
+  if(suffix.length() > str.length()) {
+    return false;
+  }
+  return 0 == str.compare(str.length() -  suffix.length(), suffix.length(), suffix);
+}
+
+inline bool IsInStr(const string& str, char ch) {
+  return str.find(ch) != string::npos;
+}
+
+inline uint16_t TwocharToUint16(char high, char low) {
+  return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
+}
+
+template <class Uint16Container>
+bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
+  if(!str) {
+    return false;
+  }
+  char ch1, ch2;
+  uint16_t tmp;
+  vec.clear();
+  for(size_t i = 0; i < len;) {
+    if(!(str[i] & 0x80)) { // 0xxxxxxx
+      vec.push_back(str[i]);
+      i++;
+    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
+      ch1 = (str[i] >> 2) & 0x07;
+      ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
+      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
+      vec.push_back(tmp);
+      i += 2;
+    } else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
+      ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
+      ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
+      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
+      vec.push_back(tmp);
+      i += 3;
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <class Uint16Container>
+bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
+  return Utf8ToUnicode(str.c_str(), str.size(), vec);
+}
+
+template <class Uint32Container>
+bool Utf8ToUnicode32(const char * str, size_t size, Uint32Container& vec) {
+  uint32_t tmp;
+  vec.clear();
+  for(size_t i = 0; i < size;) {
+    if(!(str[i] & 0x80)) { // 0xxxxxxx
+      // 7bit, total 7bit
+      tmp = (uint8_t)(str[i]) & 0x7f;
+      i++;
+    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < size) { // 110xxxxxx
+      // 5bit, total 5bit
+      tmp = (uint8_t)(str[i]) & 0x1f;
+
+      // 6bit, total 11bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+1]) & 0x3f;
+      i += 2;
+    } else if((uint8_t)str[i] <= 0xef && i + 2 < size) { // 1110xxxxxx
+      // 4bit, total 4bit
+      tmp = (uint8_t)(str[i]) & 0x0f;
+
+      // 6bit, total 10bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+1]) & 0x3f;
+
+      // 6bit, total 16bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+2]) & 0x3f;
+
+      i += 3;
+    } else if((uint8_t)str[i] <= 0xf7 && i + 3 < size) { // 11110xxxx
+      // 3bit, total 3bit
+      tmp = (uint8_t)(str[i]) & 0x07;
+
+      // 6bit, total 9bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+1]) & 0x3f;
+
+      // 6bit, total 15bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+2]) & 0x3f;
+
+      // 6bit, total 21bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+3]) & 0x3f;
+
+      i += 4;
+    } else {
+      return false;
+    }
+    vec.push_back(tmp);
+  }
+  return true;
+}
+
+template <class Uint32Container>
+bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
+    return Utf8ToUnicode32(str.data(), str.size(), vec);
+}
+
+inline int UnicodeToUtf8Bytes(uint32_t ui){
+    if(ui <= 0x7f) {
+        return 1;
+    } else if(ui <= 0x7ff) {
+        return 2;
+    } else if(ui <= 0xffff) {
+        return 3;
+    } else {
+        return 4;
+    }
+}
+
+template <class Uint32ContainerConIter>
+void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
+  res.clear();
+  uint32_t ui;
+  while(begin != end) {
+    ui = *begin;
+    if(ui <= 0x7f) {
+      res += char(ui);
+    } else if(ui <= 0x7ff) {
+      res += char(((ui >> 6) & 0x1f) | 0xc0);
+      res += char((ui & 0x3f) | 0x80);
+    } else if(ui <= 0xffff) {
+      res += char(((ui >> 12) & 0x0f) | 0xe0);
+      res += char(((ui >> 6) & 0x3f) | 0x80);
+      res += char((ui & 0x3f) | 0x80);
+    } else {
+      res += char(((ui >> 18) & 0x03) | 0xf0);
+      res += char(((ui >> 12) & 0x3f) | 0x80);
+      res += char(((ui >> 6) & 0x3f) | 0x80);
+      res += char((ui & 0x3f) | 0x80);
+    }
+    begin ++;
+  }
+}
+
+template <class Uint16ContainerConIter>
+void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
+  res.clear();
+  uint16_t ui;
+  while(begin != end) {
+    ui = *begin;
+    if(ui <= 0x7f) {
+      res += char(ui);
+    } else if(ui <= 0x7ff) {
+      res += char(((ui>>6) & 0x1f) | 0xc0);
+      res += char((ui & 0x3f) | 0x80);
+    } else {
+      res += char(((ui >> 12) & 0x0f )| 0xe0);
+      res += char(((ui>>6) & 0x3f )| 0x80 );
+      res += char((ui & 0x3f) | 0x80);
+    }
+    begin ++;
+  }
+}
+
+
+template <class Uint16Container>
+bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
+  vec.clear();
+  if(!str) {
+    return true;
+  }
+  size_t i = 0;
+  while(i < len) {
+    if(0 == (str[i] & 0x80)) {
+      vec.push_back(uint16_t(str[i]));
+      i++;
+    } else {
+      if(i + 1 < len) { //&& (str[i+1] & 0x80))
+        uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
+        vec.push_back(tmp);
+        i += 2;
+      } else {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+template <class Uint16Container>
+bool GBKTrans(const string& str, Uint16Container& vec) {
+  return GBKTrans(str.c_str(), str.size(), vec);
+}
+
+template <class Uint16ContainerConIter>
+void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
+  res.clear();
+  //pair<char, char> pa;
+  char first, second;
+  while(begin != end) {
+    //pa = uint16ToChar2(*begin);
+    first = ((*begin)>>8) & 0x00ff;
+    second = (*begin) & 0x00ff;
+    if(first & 0x80) {
+      res += first;
+      res += second;
+    } else {
+      res += second;
+    }
+    begin++;
+  }
+}
+
+/*
+ * format example: "%Y-%m-%d %H:%M:%S"
+ */
+// inline void GetTime(const string& format, string&  timeStr) {
+//   time_t timeNow;
+//   time(&timeNow);
+//   timeStr.resize(64);
+//   size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
+//   timeStr.resize(len);
+// }
+
+inline string PathJoin(const string& path1, const string& path2) {
+  if(EndsWith(path1, "/")) {
+    return path1 + path2;
+  }
+  return path1 + "/" + path2;
+}
+
+}
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/Thread.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Thread.hpp
@ -0,0 +1,44 @@
+#ifndef LIMONP_THREAD_HPP
+#define LIMONP_THREAD_HPP
+
+#include "Logging.hpp"
+#include "NonCopyable.hpp"
+
+namespace limonp {
+
+class IThread: NonCopyable {
+ public:
+  IThread(): isStarted(false), isJoined(false) {
+  }
+  virtual ~IThread() {
+    if(isStarted && !isJoined) {
+      XCHECK(!pthread_detach(thread_));
+    }
+  };
+
+  virtual void Run() = 0;
+  void Start() {
+    XCHECK(!isStarted);
+    XCHECK(!pthread_create(&thread_, NULL, Worker, this));
+    isStarted = true;
+  }
+  void Join() {
+    XCHECK(!isJoined);
+    XCHECK(!pthread_join(thread_, NULL));
+    isJoined = true;
+  }
+ private:
+  static void * Worker(void * data) {
+    IThread * ptr = (IThread* ) data;
+    ptr->Run();
+    return NULL;
+  }
+
+  pthread_t thread_;
+  bool isStarted;
+  bool isJoined;
+}; // class IThread
+
+} // namespace limonp
+
+#endif // LIMONP_THREAD_HPP
--- a/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp
@ -0,0 +1,86 @@
+#ifndef LIMONP_THREAD_POOL_HPP
+#define LIMONP_THREAD_POOL_HPP
+
+#include "Thread.hpp"
+#include "BlockingQueue.hpp"
+#include "BoundedBlockingQueue.hpp"
+#include "Closure.hpp"
+
+namespace limonp {
+
+using namespace std;
+
+//class ThreadPool;
+class ThreadPool: NonCopyable {
+ public:
+  class Worker: public IThread {
+   public:
+    Worker(ThreadPool* pool): ptThreadPool_(pool) {
+      assert(ptThreadPool_);
+    }
+    virtual ~Worker() {
+    }
+
+    virtual void Run() {
+      while (true) {
+        ClosureInterface* closure = ptThreadPool_->queue_.Pop();
+        if (closure == NULL) {
+          break;
+        }
+        try {
+          closure->Run();
+        } catch(std::exception& e) {
+          XLOG(ERROR) << e.what();
+        } catch(...) {
+          XLOG(ERROR) << " unknown exception.";
+        }
+        delete closure;
+      }
+    }
+   private:
+    ThreadPool * ptThreadPool_;
+  }; // class Worker
+
+  ThreadPool(size_t thread_num)
+    : threads_(thread_num), 
+      queue_(thread_num) {
+    assert(thread_num);
+    for(size_t i = 0; i < threads_.size(); i ++) {
+      threads_[i] = new Worker(this);
+    }
+  }
+  ~ThreadPool() {
+    Stop();
+  }
+
+  void Start() {
+    for(size_t i = 0; i < threads_.size(); i++) {
+      threads_[i]->Start();
+    }
+  }
+  void Stop() {
+    for(size_t i = 0; i < threads_.size(); i ++) {
+      queue_.Push(NULL);
+    }
+    for(size_t i = 0; i < threads_.size(); i ++) {
+      threads_[i]->Join();
+      delete threads_[i];
+    }
+    threads_.clear();
+  }
+
+  void Add(ClosureInterface* task) {
+    assert(task);
+    queue_.Push(task);
+  }
+
+ private:
+  friend class Worker;
+
+  vector<IThread*> threads_;
+  BoundedBlockingQueue<ClosureInterface*> queue_;
+}; // class ThreadPool
+
+} // namespace limonp
+
+#endif // LIMONP_THREAD_POOL_HPP
--- a/libchinese-segmentation/cppjieba/limonp/limonp.pri
+++ b/libchinese-segmentation/cppjieba/limonp/limonp.pri
@ -0,0 +1,22 @@
+INCLUDEPATH += $$PWD
+
+HEADERS += \
+    $$PWD/ArgvContext.hpp \
+    $$PWD/BlockingQueue.hpp \
+    $$PWD/BoundedBlockingQueue.hpp \
+    $$PWD/BoundedQueue.hpp \
+    $$PWD/Closure.hpp \
+    $$PWD/Colors.hpp \
+    $$PWD/Condition.hpp \
+    $$PWD/Config.hpp \
+    $$PWD/FileLock.hpp \
+    $$PWD/ForcePublic.hpp \
+    $$PWD/LocalVector.hpp \
+    $$PWD/Logging.hpp \
+    $$PWD/Md5.hpp \
+    $$PWD/MutexLock.hpp \
+    $$PWD/NonCopyable.hpp \
+    $$PWD/StdExtension.hpp \
+    $$PWD/StringUtil.hpp \
+    $$PWD/Thread.hpp \
+    $$PWD/ThreadPool.hpp
--- a/libchinese-segmentation/cppjieba/segment-trie/segment-trie.cpp
+++ b/libchinese-segmentation/cppjieba/segment-trie/segment-trie.cpp
@ -0,0 +1,275 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#include <cmath>
+#include "segment-trie.h"
+
+DictTrie::DictTrie(const vector<string> file_paths, string dat_cache_path)
+    : StorageBase<DatMemElem, false, DictCacheFileHeader>(file_paths, dat_cache_path)
+{
+    this->Init();
+}
+
+DictTrie::DictTrie(const string &dict_path, const string &user_dict_paths, const string &dat_cache_path)
+    : StorageBase<DatMemElem, false, DictCacheFileHeader>(vector<string>{dict_path, user_dict_paths}, dat_cache_path)
+{
+    this->Init();
+}
+
+void DictTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
+{
+    DictCacheFileHeader header;
+    assert(sizeof(header.md5_hex) == md5.size());
+    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
+    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+    umask(S_IWGRP | S_IWOTH);
+    const int fd =mkstemp((char *)tmp_filepath.data());
+    assert(fd >= 0);
+    fchmod(fd, 0644);
+
+    write_bytes = write(fd, (const char *)&header, sizeof(DictCacheFileHeader));
+
+    this->PreLoad();
+    this->LoadDefaultDict(fd, write_bytes, offset, elements_num);
+    this->LoadUserDict(fd, write_bytes, offset, elements_num);
+
+    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
+
+    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
+    write(fd, &elements_num, sizeof(int));
+    write(fd, &offset, sizeof(int));
+    data_trie_size = this->GetDataTrieSize();
+    write(fd, &data_trie_size, sizeof(int));
+    write(fd, &m_min_weight, sizeof(double));
+
+    close(fd);
+    assert((size_t)write_bytes == sizeof(DictCacheFileHeader) + offset + this->GetDataTrieTotalSize());
+
+    tryRename(tmp_filepath, dat_cache_file);
+}
+
+const DatMemElem * DictTrie::Find(const string &key) const
+{
+    int result = this->ExactMatchSearch(key.c_str(), key.size());
+    if (result < 0)
+        return nullptr;
+    return &this->GetElementPtr()[result];
+}
+
+
+
+void DictTrie::FindDatDag(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<DatDag> &res, size_t max_word_len) const {
+
+    res.clear();
+    res.resize(end - begin);
+
+    string text_str;
+    EncodeRunesToString(begin, end, text_str);
+
+    static const size_t max_num = 128;
+    result_pair_type result_pairs[max_num] = {};
+
+    for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) {
+
+        std::size_t num_results = this->CommonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
+
+        res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + 1, nullptr));
+
+        for (std::size_t idx = 0; idx < num_results; ++idx) {
+            auto & match = result_pairs[idx];
+
+            if ((match.value < 0) || ((size_t)match.value >= this->GetCacheFileHeaderPtr()->elements_size)) {
+                continue;
+            }
+
+            auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
+
+            if (char_num > max_word_len) {
+                continue;
+            }
+
+            const DatMemElem * pValue = &this->GetElementPtr()[match.value];
+
+            if (1 == char_num) {
+                res[i].nexts[0].second = pValue;
+                continue;
+            }
+
+            res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + char_num, pValue));
+        }
+
+        begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune);
+    }
+}
+
+void DictTrie::FindWordRange(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange> &words, size_t max_word_len) const {
+
+    string text_str;
+    EncodeRunesToString(begin, end, text_str);
+
+    static const size_t max_num = 128;
+    result_pair_type result_pairs[max_num] = {};//存放字典查询结果
+    size_t str_size = end - begin;
+    double max_weight[str_size];//存放逆向路径最大weight
+    for (size_t i = 0; i<str_size; i++) {
+        max_weight[i] = -3.14e+100;
+    }
+    size_t max_next[str_size];//存放动态规划后的分词结果
+    //memset(max_next,-1,str_size*sizeof(size_t));
+
+    double val(0);
+    for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
+        size_t nextPos = str_size - i;//逆向计算
+        begin_pos -= (end - i - 1)->len;
+
+        std::size_t num_results = this->CommonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
+        if (0 == num_results) {//字典不存在则单独分词
+            val = GetMinWeight();
+            if (nextPos  < str_size) {
+                val += max_weight[nextPos];
+            }
+            if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
+                max_weight[nextPos - 1] = val;
+                max_next[nextPos - 1] = nextPos;
+            }
+        } else {//字典存在则根据查询结果数量计算最大概率路径
+            for (std::size_t idx = 0; idx < num_results; ++idx) {
+                auto & match = result_pairs[idx];
+                if ((match.value < 0) || ((uint32_t)match.value >= this->GetCacheFileHeaderPtr()->elements_size)) {
+                    continue;
+                }
+                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
+                if (char_num > max_word_len) {
+                    continue;
+                }
+                auto * pValue = &this->GetElementPtr()[match.value];
+
+                val = pValue->weight;
+                if (1 == char_num) {
+                    if (nextPos  < str_size) {
+                        val += max_weight[nextPos];
+                    }
+                    if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
+                        max_weight[nextPos - 1] = val;
+                        max_next[nextPos - 1] = nextPos;
+                    }
+                } else {
+                    if (nextPos - 1 + char_num  < str_size) {
+                        val += max_weight[nextPos - 1 + char_num];
+                    }
+                    if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) {
+                        max_weight[nextPos - 1] = val;
+                        max_next[nextPos - 1] = nextPos - 1 + char_num;
+                    }
+                }
+            }
+        }
+    }
+    for (size_t i = 0; i < str_size;) {//统计动态规划结果
+        assert(max_next[i] > i);
+        assert(max_next[i] <= str_size);
+        WordRange wr(begin + i, begin + max_next[i] - 1);
+        words.push_back(wr);
+        i = max_next[i];
+    }
+}
+
+bool DictTrie::IsUserDictSingleChineseWord(const Rune &word) const {
+    return IsIn(m_user_dict_single_chinese_word, word);
+}
+
+void DictTrie::PreLoad()
+{
+    ifstream ifs(DICT_PATH);
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, " ");
+        if (buf.size() != 3)
+            continue;
+        m_freq_sum += atof(buf[1].c_str());
+    }
+}
+
+void DictTrie::LoadDefaultDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
+{
+    ifstream ifs(DICT_PATH);
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, " ");
+        if (buf.size() != 3)
+            continue;
+        DatMemElem node_info;
+        node_info.weight = log(atof(buf[1].c_str()) / m_freq_sum);
+        node_info.SetTag(buf[2]);
+        this->Update(buf[0].c_str(), buf[0].size(), elements_num);
+        offset += (sizeof(DatMemElem));
+        elements_num++;
+        if (m_min_weight > node_info.weight) {
+            m_min_weight = node_info.weight;
+        }
+        write_bytes += write(fd, &node_info, sizeof(DatMemElem));
+    }
+}
+
+void DictTrie::LoadUserDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
+{
+    ifstream ifs(USER_DICT_PATH);
+    string line;
+    vector<string> buf;
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, " ");
+        if (buf.size() != 3)
+            continue;
+        DatMemElem node_info;
+        assert(m_freq_sum > 0.0);
+        const int freq = atoi(buf[1].c_str());
+        node_info.weight = log(1.0 * freq / m_freq_sum);
+        node_info.SetTag(buf[2]);
+        this->Update(buf[0].c_str(), buf[0].size(), elements_num);
+        offset += (sizeof(DatMemElem));
+        elements_num++;
+        write_bytes += write(fd, &node_info, sizeof(DatMemElem));
+        if (Utf8CharNum(buf[0]) == 1) {
+            RuneArray word;
+            if (DecodeRunesInString(buf[0], word)) {
+                m_user_dict_single_chinese_word.insert(word[0]);
+            }
+        }
+    }
+}
+
+inline double DictTrie::GetMinWeight() const
+{
+    return this->GetCacheFileHeaderPtr()->min_weight;
+}
--- a/libchinese-segmentation/cppjieba/segment-trie/segment-trie.h
+++ b/libchinese-segmentation/cppjieba/segment-trie/segment-trie.h
@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef SegmentTrie_H
+#define SegmentTrie_H
+
+#include "storage-base.hpp"
+#include "cppjieba/Unicode.hpp"
+
+using namespace cppjieba;
+
+const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
+const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
+
+struct DictCacheFileHeader : CacheFileHeaderBase
+{
+    double min_weight = 0;
+};
+
+class DictTrie : public StorageBase<DatMemElem, false, DictCacheFileHeader>
+{
+public:
+    DictTrie(const vector<string> file_paths, string dat_cache_path = "");
+    DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "");
+    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
+
+    const DatMemElem *Find(const string &key) const;
+    void FindDatDag(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
+              vector<struct DatDag>&res, size_t max_word_len = MAX_WORD_LENGTH) const;
+    void FindWordRange(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
+              vector<WordRange>& words, size_t max_word_len = MAX_WORD_LENGTH) const;
+    bool IsUserDictSingleChineseWord(const Rune& word) const;
+
+private:
+    DictTrie();
+    void PreLoad();
+    void LoadDefaultDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
+    void LoadUserDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
+    double GetMinWeight() const;
+
+    double m_freq_sum = 0.0;
+    double m_min_weight = 3.14e+100;
+    unordered_set<Rune> m_user_dict_single_chinese_word;
+};
+
+#endif // SegmentTrie_H
--- a/libchinese-segmentation/development-files/header-files/ChineseSegmentation
+++ b/libchinese-segmentation/development-files/header-files/ChineseSegmentation
@ -0,0 +1 @@
+#include "chinese-segmentation.h"
--- a/libchinese-segmentation/development-files/header-files/HanZiToPinYin
+++ b/libchinese-segmentation/development-files/header-files/HanZiToPinYin
@ -0,0 +1 @@
+#include "hanzi-to-pinyin.h"
--- a/libchinese-segmentation/dict/README.md
+++ b/libchinese-segmentation/dict/README.md
@ -0,0 +1,31 @@
+# CppJieba字典
+
+文件后缀名代表的是词典的编码方式。
+比如filename.utf8 是 utf8编码，filename.gbk 是 gbk编码方式。
+
+
+## 分词
+
+### jieba.dict.utf8/gbk
+
+作为最大概率法(MPSegment: Max Probability)分词所使用的词典。
+
+### hmm_model.utf8/gbk
+
+作为隐式马尔科夫模型(HMMSegment: Hidden Markov Model)分词所使用的词典。
+
+__对于MixSegment(混合MPSegment和HMMSegment两者)则同时使用以上两个词典__
+
+
+## 关键词抽取
+
+### idf.utf8
+
+IDF(Inverse Document Frequency)
+在KeywordExtractor中，使用的是经典的TF-IDF算法，所以需要这么一个词典提供IDF信息。
+
+### stop_words.utf8
+
+停用词词典
+
+
--- a/libchinese-segmentation/dict/hmm_model.utf8
+++ b/libchinese-segmentation/dict/hmm_model.utf8
--- a/libchinese-segmentation/dict/idf.utf8
+++ b/libchinese-segmentation/dict/idf.utf8
--- a/libchinese-segmentation/dict/jieba.dict.utf8
+++ b/libchinese-segmentation/dict/jieba.dict.utf8
--- a/libchinese-segmentation/dict/pinyinWithoutTone.txt
+++ b/libchinese-segmentation/dict/pinyinWithoutTone.txt
--- a/libchinese-segmentation/dict/pos_dict/char_state_tab.utf8
+++ b/libchinese-segmentation/dict/pos_dict/char_state_tab.utf8
--- a/libchinese-segmentation/dict/pos_dict/prob_emit.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_emit.utf8
--- a/libchinese-segmentation/dict/pos_dict/prob_start.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_start.utf8
@ -0,0 +1,259 @@
+#初始状态的概率
+#格式
+#状态:概率
+B,a:-4.7623052146
+B,ad:-6.68006603678
+B,ag:-3.14e+100
+B,an:-8.69708322302
+B,b:-5.01837436211
+B,bg:-3.14e+100
+B,c:-3.42388018495
+B,d:-3.97504752976
+B,df:-8.88897423083
+B,dg:-3.14e+100
+B,e:-8.56355183039
+B,en:-3.14e+100
+B,f:-5.49163041848
+B,g:-3.14e+100
+B,h:-13.53336513
+B,i:-6.11578472756
+B,in:-3.14e+100
+B,j:-5.05761912847
+B,jn:-3.14e+100
+B,k:-3.14e+100
+B,l:-4.90588358466
+B,ln:-3.14e+100
+B,m:-3.6524299819
+B,mg:-3.14e+100
+B,mq:-6.7869530014
+B,n:-1.69662577975
+B,ng:-3.14e+100
+B,nr:-2.23104959138
+B,nrfg:-5.87372217541
+B,nrt:-4.98564273352
+B,ns:-2.8228438315
+B,nt:-4.84609166818
+B,nz:-3.94698846058
+B,o:-8.43349870215
+B,p:-4.20098413209
+B,q:-6.99812385896
+B,qe:-3.14e+100
+B,qg:-3.14e+100
+B,r:-3.40981877908
+B,rg:-3.14e+100
+B,rr:-12.4347528413
+B,rz:-7.94611647157
+B,s:-5.52267359084
+B,t:-3.36474790945
+B,tg:-3.14e+100
+B,u:-9.1639172775
+B,ud:-3.14e+100
+B,ug:-3.14e+100
+B,uj:-3.14e+100
+B,ul:-3.14e+100
+B,uv:-3.14e+100
+B,uz:-3.14e+100
+B,v:-2.67405848743
+B,vd:-9.04472876024
+B,vg:-3.14e+100
+B,vi:-12.4347528413
+B,vn:-4.33156108902
+B,vq:-12.1470707689
+B,w:-3.14e+100
+B,x:-3.14e+100
+B,y:-9.84448567586
+B,yg:-3.14e+100
+B,z:-7.04568111149
+B,zg:-3.14e+100
+E,a:-3.14e+100
+E,ad:-3.14e+100
+E,ag:-3.14e+100
+E,an:-3.14e+100
+E,b:-3.14e+100
+E,bg:-3.14e+100
+E,c:-3.14e+100
+E,d:-3.14e+100
+E,df:-3.14e+100
+E,dg:-3.14e+100
+E,e:-3.14e+100
+E,en:-3.14e+100
+E,f:-3.14e+100
+E,g:-3.14e+100
+E,h:-3.14e+100
+E,i:-3.14e+100
+E,in:-3.14e+100
+E,j:-3.14e+100
+E,jn:-3.14e+100
+E,k:-3.14e+100
+E,l:-3.14e+100
+E,ln:-3.14e+100
+E,m:-3.14e+100
+E,mg:-3.14e+100
+E,mq:-3.14e+100
+E,n:-3.14e+100
+E,ng:-3.14e+100
+E,nr:-3.14e+100
+E,nrfg:-3.14e+100
+E,nrt:-3.14e+100
+E,ns:-3.14e+100
+E,nt:-3.14e+100
+E,nz:-3.14e+100
+E,o:-3.14e+100
+E,p:-3.14e+100
+E,q:-3.14e+100
+E,qe:-3.14e+100
+E,qg:-3.14e+100
+E,r:-3.14e+100
+E,rg:-3.14e+100
+E,rr:-3.14e+100
+E,rz:-3.14e+100
+E,s:-3.14e+100
+E,t:-3.14e+100
+E,tg:-3.14e+100
+E,u:-3.14e+100
+E,ud:-3.14e+100
+E,ug:-3.14e+100
+E,uj:-3.14e+100
+E,ul:-3.14e+100
+E,uv:-3.14e+100
+E,uz:-3.14e+100
+E,v:-3.14e+100
+E,vd:-3.14e+100
+E,vg:-3.14e+100
+E,vi:-3.14e+100
+E,vn:-3.14e+100
+E,vq:-3.14e+100
+E,w:-3.14e+100
+E,x:-3.14e+100
+E,y:-3.14e+100
+E,yg:-3.14e+100
+E,z:-3.14e+100
+E,zg:-3.14e+100
+M,a:-3.14e+100
+M,ad:-3.14e+100
+M,ag:-3.14e+100
+M,an:-3.14e+100
+M,b:-3.14e+100
+M,bg:-3.14e+100
+M,c:-3.14e+100
+M,d:-3.14e+100
+M,df:-3.14e+100
+M,dg:-3.14e+100
+M,e:-3.14e+100
+M,en:-3.14e+100
+M,f:-3.14e+100
+M,g:-3.14e+100
+M,h:-3.14e+100
+M,i:-3.14e+100
+M,in:-3.14e+100
+M,j:-3.14e+100
+M,jn:-3.14e+100
+M,k:-3.14e+100
+M,l:-3.14e+100
+M,ln:-3.14e+100
+M,m:-3.14e+100
+M,mg:-3.14e+100
+M,mq:-3.14e+100
+M,n:-3.14e+100
+M,ng:-3.14e+100
+M,nr:-3.14e+100
+M,nrfg:-3.14e+100
+M,nrt:-3.14e+100
+M,ns:-3.14e+100
+M,nt:-3.14e+100
+M,nz:-3.14e+100
+M,o:-3.14e+100
+M,p:-3.14e+100
+M,q:-3.14e+100
+M,qe:-3.14e+100
+M,qg:-3.14e+100
+M,r:-3.14e+100
+M,rg:-3.14e+100
+M,rr:-3.14e+100
+M,rz:-3.14e+100
+M,s:-3.14e+100
+M,t:-3.14e+100
+M,tg:-3.14e+100
+M,u:-3.14e+100
+M,ud:-3.14e+100
+M,ug:-3.14e+100
+M,uj:-3.14e+100
+M,ul:-3.14e+100
+M,uv:-3.14e+100
+M,uz:-3.14e+100
+M,v:-3.14e+100
+M,vd:-3.14e+100
+M,vg:-3.14e+100
+M,vi:-3.14e+100
+M,vn:-3.14e+100
+M,vq:-3.14e+100
+M,w:-3.14e+100
+M,x:-3.14e+100
+M,y:-3.14e+100
+M,yg:-3.14e+100
+M,z:-3.14e+100
+M,zg:-3.14e+100
+S,a:-3.90253968313
+S,ad:-11.0484584802
+S,ag:-6.95411391796
+S,an:-12.8402179494
+S,b:-6.47288876397
+S,bg:-3.14e+100
+S,c:-4.78696679586
+S,d:-3.90391976418
+S,df:-3.14e+100
+S,dg:-8.9483976513
+S,e:-5.94251300628
+S,en:-3.14e+100
+S,f:-5.19482024998
+S,g:-6.50782681533
+S,h:-8.65056320738
+S,i:-3.14e+100
+S,in:-3.14e+100
+S,j:-4.91199211964
+S,jn:-3.14e+100
+S,k:-6.94032059583
+S,l:-3.14e+100
+S,ln:-3.14e+100
+S,m:-3.26920065212
+S,mg:-10.8253149289
+S,mq:-3.14e+100
+S,n:-3.85514838976
+S,ng:-4.9134348611
+S,nr:-4.48366310396
+S,nrfg:-3.14e+100
+S,nrt:-3.14e+100
+S,ns:-3.14e+100
+S,nt:-12.1470707689
+S,nz:-3.14e+100
+S,o:-8.46446092775
+S,p:-2.98684018136
+S,q:-4.88865861826
+S,qe:-3.14e+100
+S,qg:-3.14e+100
+S,r:-2.76353367841
+S,rg:-10.2752685919
+S,rr:-3.14e+100
+S,rz:-3.14e+100
+S,s:-3.14e+100
+S,t:-3.14e+100
+S,tg:-6.27284253188
+S,u:-6.94032059583
+S,ud:-7.72823016105
+S,ug:-7.53940370266
+S,uj:-6.85251045118
+S,ul:-8.41537131755
+S,uv:-8.15808672229
+S,uz:-9.29925862537
+S,v:-3.05329230341
+S,vd:-3.14e+100
+S,vg:-5.94301818437
+S,vi:-3.14e+100
+S,vn:-11.4539235883
+S,vq:-3.14e+100
+S,w:-3.14e+100
+S,x:-8.42741965607
+S,y:-6.19707946995
+S,yg:-13.53336513
+S,z:-3.14e+100
+S,zg:-3.14e+100
--- a/libchinese-segmentation/dict/pos_dict/prob_trans.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_trans.utf8
--- a/libchinese-segmentation/dict/stop_words.utf8
+++ b/libchinese-segmentation/dict/stop_words.utf8
--- a/libchinese-segmentation/dict/user.dict.utf8
+++ b/libchinese-segmentation/dict/user.dict.utf8
@ -0,0 +1,4 @@
+云计算
+韩玉鉴赏
+蓝翔 nz
+区块链 10 nz
--- a/libchinese-segmentation/hanzi-to-pinyin-private.h
+++ b/libchinese-segmentation/hanzi-to-pinyin-private.h
@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#ifndef HANZITOPINYINPRIVATE_H
+#define HANZITOPINYINPRIVATE_H
+
+#include <QtCore/qglobal.h>
+#include <QHash>
+#include "pinyin4cpp_dictTrie.h"
+#include "hanzi-to-pinyin.h"
+#include "pinyin4cpp-trie.h"
+
+using namespace std;
+
+static const QHash<QString, QString> PhoneticSymbol = {
+    {"ā", "a1"}, {"á", "a2"}, {"ǎ", "a3"}, {"à", "a4"},
+    {"ē", "e1"}, {"é", "e2"}, {"ě", "e3"}, {"è", "e4"},
+    {"ō", "o1"}, {"ó", "o2"}, {"ǒ", "o3"}, {"ò", "o4"},
+    {"ī", "i1"}, {"í", "i2"}, {"ǐ", "i3"}, {"ì", "i4"},
+    {"ū", "u1"}, {"ú", "u2"}, {"ǔ", "u3"}, {"ù", "u4"},
+    // üe
+    {"ü", "v"},
+    {"ǖ", "v1"}, {"ǘ", "v2"}, {"ǚ", "v3"}, {"ǜ", "v4"},
+    {"ń", "n2"}, {"ň", "n3"}, {"ǹ", "n4"},
+    {"m̄", "m1"}, {"ḿ", "m2"}, {"m̀", "m4"},
+    {"ê̄", "ê1"}, {"ế", "ê2"}, {"ê̌", "ê3"}, {"ề", "ê4"}
+};
+
+#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
+
+class PINYINMANAGER_EXPORT HanZiToPinYinPrivate
+{
+public:
+    HanZiToPinYinPrivate(HanZiToPinYin *parent = nullptr);
+    ~HanZiToPinYinPrivate();
+
+public:
+    template <typename T>
+    bool isMultiTone(T &&t) {return m_pinYinTrie.IsMultiTone(std::forward<T>(t));}
+
+    bool contains(string &word);
+    int getResults(string &word, QStringList &results);
+    void setConfig(PinyinDataStyle dataStyle, SegType segType, PolyphoneType polyphoneType, ExDataProcessType processType);
+
+private:
+    void convertDataStyle(QStringList &results);
+
+    HanZiToPinYin *q = nullptr;
+    //Pinyin4cppDictTrie *m_pinYinTrie = nullptr;
+    Pinyin4cppTrie m_pinYinTrie;
+
+    SegType m_segType = SegType::Segmentation;
+    PolyphoneType m_polyphoneType = PolyphoneType::Disable;
+    PinyinDataStyle m_pinyinDataStyle = PinyinDataStyle::Default;
+    ExDataProcessType m_exDataProcessType = ExDataProcessType::Default;
+};
+#endif // HANZITOPINYINPRIVATE_H
--- a/libchinese-segmentation/hanzi-to-pinyin.cpp
+++ b/libchinese-segmentation/hanzi-to-pinyin.cpp
@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#include <mutex>
+#include <cctype>
+#include "hanzi-to-pinyin.h"
+#include "hanzi-to-pinyin-private.h"
+#include "chinese-segmentation.h"
+#include "cppjieba/Unicode.hpp"
+
+HanZiToPinYin * HanZiToPinYin::g_pinYinManager = nullptr;
+std::once_flag g_singleFlag;
+
+bool HanZiToPinYinPrivate::contains(string &word)
+{
+    return m_pinYinTrie.Contains(word);
+}
+
+int HanZiToPinYinPrivate::getResults(string &word, QStringList &results)
+{
+    results.clear();
+
+    string directResult = m_pinYinTrie.Find(word);
+
+    if (directResult == string()) {
+        if (m_segType == SegType::NoSegmentation) {//无分词、无结果直接返回-1
+            return -1;
+        } else {//无结果、启用分词
+            vector<string> segResults = ChineseSegmentation::getInstance()->callMixSegmentCutStr(word);
+            string data;
+            for (string &info : segResults) {
+                if (info == string()) {
+                    continue;
+                }
+                data = m_pinYinTrie.Find(info);
+                if (data == string()) {//分词后无结果
+                    if (cppjieba::IsSingleWord(info)) {//单个字符
+                        if (m_exDataProcessType == ExDataProcessType::Default) {//原数据返回
+                            results.append(QString().fromStdString(info));
+                        } else if (m_exDataProcessType == ExDataProcessType::Delete) {//忽略
+                            continue;
+                        }
+                    } else {//多个字符
+                        string oneWord;
+                        cppjieba::RuneStrArray runeArray;
+                        cppjieba::DecodeRunesInString(info, runeArray);
+                        for (auto i = runeArray.begin(); i != runeArray.end(); ++i) {
+                            oneWord = cppjieba::GetStringFromRunes(info, i, i);
+                            data = m_pinYinTrie.Find(oneWord);
+                            if (data == string()) {//单字无结果则按设置返回
+                                if (m_exDataProcessType == ExDataProcessType::Default) {//原数据返回
+                                    results.append(QString().fromStdString(oneWord));
+                                } else if (m_exDataProcessType == ExDataProcessType::Delete) {//忽略
+                                    continue;
+                                }
+                            }
+                            if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
+                                results.append(QString().fromStdString(data));
+                            } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
+                                if (limonp::IsInStr(data, ',')) {
+                                    results.append(QString().fromStdString(data.substr(0, data.find_first_of(",", 0))));
+                                } else {
+                                    results.append(QString().fromStdString(data));
+                                }
+                            }
+                        }
+                    }
+                } else {//分词后有结果
+                    if (cppjieba::IsSingleWord(info)) {//单个字符
+                        if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
+                            results.append(QString().fromStdString(data));
+                        } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
+                            if (limonp::IsInStr(data, ',')) {
+                                results.append(QString().fromStdString(data.substr(0, data.find_first_of(",", 0))));
+                            } else {
+                                results.append(QString().fromStdString(data));
+                            }
+                        }
+                    } else {//多个字符
+                        vector<string> dataVec = limonp::Split(data, "/");
+                        if (dataVec.size() == 1) {//无多音词
+                            vector<string> dataVec = limonp::Split(data, ",");
+                            for (auto &oneResult : dataVec) {
+                                results.append(QString().fromStdString(oneResult));
+                            }
+                        } else {
+                            if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
+                                int wordSize = limonp::Split(dataVec[0], ",").size();
+                                for (int i = 0; i < wordSize; ++i) {
+                                    QStringList oneResult;
+                                    for (size_t j = 0; j < dataVec.size(); ++j) {
+                                        oneResult.append(QString().fromStdString(limonp::Split(dataVec[j], ",")[i]));
+                                    }
+                                    results.append(oneResult.join('/'));
+                                }
+                            } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
+                                vector<string> tmp = limonp::Split(dataVec[0], ",");
+                                for (auto &oneResult : tmp) {
+                                    results.append(QString().fromStdString(oneResult));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    } else {//可以直接查到结果
+        if (cppjieba::IsSingleWord(word)) {//单个字符
+            if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
+                results.append(QString().fromStdString(directResult));
+            } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
+                if (limonp::IsInStr(directResult, ',')) {
+                    results.append(QString().fromStdString(directResult.substr(0, directResult.find_first_of(",", 0))));
+                } else {
+                    results.append(QString().fromStdString(directResult));
+                }
+            }
+        } else {//多个字符
+            vector<string> dataVec = limonp::Split(directResult, "/");
+            if (dataVec.size() == 1) {//无多音词
+                vector<string> dataVec = limonp::Split(directResult, ",");
+                for (auto &oneResult : dataVec) {
+                    results.append(QString().fromStdString(oneResult));
+                }
+            } else {
+                if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
+                    int wordSize = limonp::Split(dataVec[0], ",").size();
+                    for (int i = 0; i < wordSize; ++i) {
+                        QStringList oneResult;
+                        for (size_t j = 0; j < dataVec.size(); ++j) {
+                            oneResult.append(QString().fromStdString(limonp::Split(dataVec[j], ",")[i]));
+                        }
+                        results.append(oneResult.join('/'));
+                    }
+                } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
+                    vector<string> tmp = limonp::Split(dataVec[0], ",");
+                    for (auto &oneResult : tmp) {
+                        results.append(QString().fromStdString(oneResult));
+                    }
+                }
+            }
+        }
+    }
+    convertDataStyle(results);
+    return 0;//todo
+}
+
+void HanZiToPinYinPrivate::setConfig(PinyinDataStyle dataStyle, SegType segType, PolyphoneType polyphoneType, ExDataProcessType processType)
+{
+    m_pinyinDataStyle = dataStyle;
+    m_segType = segType;
+    m_polyphoneType = polyphoneType;
+    m_exDataProcessType = processType;
+}
+
+void HanZiToPinYinPrivate::convertDataStyle(QStringList &results)
+{
+    QString value;
+    if (m_pinyinDataStyle == PinyinDataStyle::Default) {
+        for (QString &info : results) {
+            if(info == ",") {
+                continue;
+            }
+            //if info's length was been changed, there's someting wrong while traverse the chars of info
+            for (const QChar &c : info) {
+                if (!isalpha(c.toLatin1())) {
+                    value = PhoneticSymbol.value(c);
+                    if (!value.isEmpty()) {
+                        info.replace(c, value.at(0));
+                    }
+                }
+            }
+
+            QStringList tmpList = info.split(',', QString::SkipEmptyParts); //去重(保持原顺序)
+            QStringList tmpValue;
+            for (auto &str : tmpList) {
+                if (!tmpValue.contains(str)) {
+                    tmpValue.push_back(str);
+                }
+            }
+            info = tmpValue.join(",");
+        }
+    } else if (m_pinyinDataStyle == PinyinDataStyle::Tone) {
+        //无需处理
+    } else if (m_pinyinDataStyle == PinyinDataStyle::Tone2) {
+        for (QString &info : results) {
+            for (int i = 0; i < info.size();) {
+                auto c = info.at(i);
+                if (!isalpha(c.toLatin1())) {
+                    value = PhoneticSymbol.value(c);
+                    if (!value.isEmpty()) {
+                        info.replace(c, PhoneticSymbol.value(c));
+                        i += PhoneticSymbol.value(c).size();
+                        continue;
+                    }
+                }
+                i++;
+            }
+        }
+    } else if (m_pinyinDataStyle == PinyinDataStyle::Tone3) {
+        for (QString &info : results) {
+            if(info == "/") {
+                continue;
+            }
+            bool isPolyphoneWords(false);
+            if (info.contains("/")) {
+                isPolyphoneWords = true;
+                info.replace("/", ",");
+            }
+
+            for (int i = 0; i < info.size();) {
+                auto c = info.at(i);
+                if (!isalpha(c.toLatin1())) {
+                    value = PhoneticSymbol.value(c);
+                    if (!value.isEmpty()) {
+                        info.replace(i, 1, value.at(0));
+                        //多音词模式
+                        if (info.contains(",")) {
+                            int pos = info.indexOf(',', i);
+                            if (isPolyphoneWords) {
+                                info.replace(",", "/");
+                            }
+                            //最后一个读音时
+                            if (pos == -1) {
+                                info.append(value.at(1));
+                                break;
+                            }
+                            info.insert(pos, value.at(1));
+                            i = pos + 1;    //insert导致','的位置加一，将i行进到','的位置
+                            i++;
+                            continue;
+                        } else {
+                            info.append(value.at(1));
+                            break;
+                        }
+                    }
+                }
+                i++;
+            }
+
+        }
+    } else if (m_pinyinDataStyle == PinyinDataStyle::FirstLetter) {
+        for (QString &info : results) {
+            if(info == "," or info == "/") {
+                continue;
+            }
+
+            bool isPolyphoneWords(false);
+            if (info.contains("/")) {
+                isPolyphoneWords = true;
+                info.replace("/", ",");
+            }
+
+            for (int i = 0; i < info.size();i++) {
+                auto c = info.at(i);
+                if (!isalpha(c.toLatin1())) {
+                    value = PhoneticSymbol.value(c);
+                    if (!value.isEmpty()) {
+                        info.replace(c, value.at(0));
+                    }
+                }
+            }
+
+            QStringList tmpList = info.split(',', QString::SkipEmptyParts); //去重(保持原顺序)
+            QStringList tmpValue;
+            for (auto &str : tmpList) {
+                if (!tmpValue.contains(str)) {
+                    tmpValue.push_back(str.at(0));
+                }
+            }
+            if (isPolyphoneWords) {
+                info = tmpValue.join("/");
+            } else {
+                info = tmpValue.join(",");
+            }
+        }
+    } else if (m_pinyinDataStyle == PinyinDataStyle::English) {
+        //暂不支持
+    }
+}
+
+HanZiToPinYinPrivate::HanZiToPinYinPrivate(HanZiToPinYin *parent) : q(parent)
+{
+    //const char * const  SINGLE_WORD_PINYIN_PATH = "/usr/share/ukui-search/res/dict/singleWordPinyin.txt";
+    //const char * const  WORDS_PINYIN_PATH = "/usr/share/ukui-search/res/dict/wordsPinyin.txt";
+    //m_pinYinTrie = new Pinyin4cppDictTrie(SINGLE_WORD_PINYIN_PATH, WORDS_PINYIN_PATH);
+    //m_pinYinTrie = new Pinyin4cppTrie;
+}
+
+HanZiToPinYinPrivate::~HanZiToPinYinPrivate()
+{
+//    if (m_pinYinTrie){
+//        delete m_pinYinTrie;
+//        m_pinYinTrie = nullptr;
+//    }
+}
+
+HanZiToPinYin * HanZiToPinYin::getInstance()
+{
+    call_once(g_singleFlag, []() {
+        g_pinYinManager = new HanZiToPinYin;
+    });
+    return g_pinYinManager;
+}
+
+bool HanZiToPinYin::contains(string &word)
+{
+    return d->contains(word);
+}
+
+bool HanZiToPinYin::isMultiTone(string &word)
+{
+    return d->isMultiTone(word);
+}
+
+bool HanZiToPinYin::isMultiTone(string &&word)
+{
+    return d->isMultiTone(word);
+}
+
+bool HanZiToPinYin::isMultiTone(const string &word)
+{
+    return d->isMultiTone(word);
+}
+
+bool HanZiToPinYin::isMultiTone(const string &&word)
+{
+    return d->isMultiTone(word);
+}
+
+int HanZiToPinYin::getResults(string word, QStringList &results)
+{
+    return d->getResults(word, results);
+}
+
+void HanZiToPinYin::setConfig(PinyinDataStyle dataStyle, SegType segType, PolyphoneType polyphoneType, ExDataProcessType processType)
+{
+    d->setConfig(dataStyle, segType, polyphoneType, processType);
+}
+
+HanZiToPinYin::HanZiToPinYin() : d(new HanZiToPinYinPrivate)
+{
+}
--- a/libchinese-segmentation/hanzi-to-pinyin.h
+++ b/libchinese-segmentation/hanzi-to-pinyin.h
@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#ifndef HANZITOPINYIN_H
+#define HANZITOPINYIN_H
+
+#include <QtCore/qglobal.h>
+#include <QStringList>
+#include "pinyin4cpp-common.h"
+#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
+
+using namespace std;
+
+class HanZiToPinYinPrivate;
+class PINYINMANAGER_EXPORT HanZiToPinYin
+{
+public:
+    static HanZiToPinYin * getInstance();
+
+public:
+    /**
+     * @brief HanZiToPinYin::isMultiTone 判断是否为多音字/词/句
+     * @param word 要判断的字/词/句
+     * @return bool 不是返回false
+     */
+    bool isMultiTone(string &word);
+    bool isMultiTone(string &&word);
+    bool isMultiTone(const string &word);
+    bool isMultiTone(const string &&word);
+
+    /**
+     * @brief HanZiToPinYin::contains 查询某个字/词/句是否有拼音（是否在数据库包含）
+     * @param word 要查询的字/词/句
+     * @return bool 数据库不包含返回false
+     */
+    bool contains(string &word);
+
+    /**
+     * @brief HanZiToPinYin::getResults 获取某个字/词/句的拼音
+     * @param word 要获取拼音的字/词/句
+     * @param results word的拼音列表（有可能多音字），每次调用results会被清空
+     * @return int 获取到返回0，否则返回-1
+     */
+    int getResults(string word, QStringList &results);
+
+    /**
+     * @brief setConfig 设置HanZiToPinYin的各项功能，详见pinyin4cpp-common.h
+     * @param dataStyle 返回数据风格，默认defult
+     * @param segType 是否启用分词，默认启用
+     * @param polyphoneType 是否启用多音字，默认不启用
+     * @param processType 无拼音数据处理模式，默认defult
+     */
+    void setConfig(PinyinDataStyle dataStyle,SegType segType,PolyphoneType polyphoneType,ExDataProcessType processType);
+
+protected:
+    HanZiToPinYin();
+    ~HanZiToPinYin();
+    HanZiToPinYin(const HanZiToPinYin&) = delete;
+    HanZiToPinYin& operator =(const HanZiToPinYin&) = delete;
+private:
+    static HanZiToPinYin *g_pinYinManager;
+    HanZiToPinYinPrivate *d = nullptr;
+};
+
+#endif // PINYINMANAGER_H
--- a/libchinese-segmentation/libchinese-segmentation.pro
+++ b/libchinese-segmentation/libchinese-segmentation.pro
@ -0,0 +1,84 @@
+QT -= gui
+
+VERSION = 1.1.0
+TARGET =  chinese-segmentation
+TEMPLATE = lib
+DEFINES += LIBCHINESESEGMENTATION_LIBRARY
+DEFINES += VERSION='\\"$${VERSION}\\"'
+
+CONFIG += c++11 create_pc create_prl no_install_prl
+
+# The following define makes your compiler emit warnings if you use
+# any Qt feature that has been marked deprecated (the exact warnings
+# depend on your compiler). Please consult the documentation of the
+# deprecated API in order to know how to port your code away from it.
+DEFINES += QT_DEPRECATED_WARNINGS
+QMAKE_CXXFLAGS += -Werror=return-type -Werror=return-local-addr
+#QMAKE_CXXFLAGS += -Werror=uninitialized
+QMAKE_CXXFLAGS += -execution-charset:utf-8
+
+# You can also make your code fail to compile if it uses deprecated APIs.
+# In order to do so, uncomment the following line.
+# You can also select to disable deprecated APIs only up to a certain version of Qt.
+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
+include(cppjieba/cppjieba.pri)
+include(pinyin4cpp/pinyin4cpp.pri)
+include(Traditional-Chinese-Simplified-conversion/Traditional2Simplified.pri)
+include(storage-base/storage-base-cedar.pri)
+
+#LIBS += -L/usr/local/lib/libjemalloc -ljemalloc
+
+SOURCES += \
+    chinese-segmentation.cpp \
+    hanzi-to-pinyin.cpp \
+    Traditional-to-Simplified.cpp
+
+HEADERS += \
+    chinese-segmentation-private.h \
+    chinese-segmentation.h \
+    common-struct.h \
+    hanzi-to-pinyin-private.h \
+    hanzi-to-pinyin.h \
+    Traditional-to-Simplified-private.h \
+    Traditional-to-Simplified.h \
+    pinyin4cpp-common.h \
+    libchinese-segmentation_global.h
+
+dict_files.path = /usr/share/ukui-search/res/dict/
+dict_files.files = $$PWD/dict/*.utf8\
+dict_files.files += $$PWD/dict/pos_dict/*.utf8\
+dict_files.files += $$PWD/dict/*.txt\
+dict_files.files += $$PWD/pinyin4cpp/dict/*.txt\
+dict_files.files += $$PWD/Traditional-Chinese-Simplified-conversion/dict/*.txt
+
+INSTALLS += \
+    dict_files \
+
+# Default rules for deployment.
+unix {
+    target.path = $$[QT_INSTALL_LIBS]
+    QMAKE_PKGCONFIG_NAME = chinese-segmentation
+    QMAKE_PKGCONFIG_DESCRIPTION = chinese-segmentation Header files
+    QMAKE_PKGCONFIG_VERSION = $$VERSION
+    QMAKE_PKGCONFIG_LIBDIR = $$target.path
+    QMAKE_PKGCONFIG_DESTDIR = pkgconfig
+    QMAKE_PKGCONFIG_INCDIR = /usr/include/chinese-seg
+    QMAKE_PKGCONFIG_CFLAGS += -I/usr/include/chinese-seg
+
+!isEmpty(target.path): INSTALLS += target
+
+    header.path = /usr/include/chinese-seg
+    header.files += chinese-segmentation.h libchinese-segmentation_global.h common-struct.h hanzi-to-pinyin.h pinyin4cpp-common.h Traditional-to-Simplified.h
+    header.files += development-files/header-files/*
+#    headercppjieba.path = /usr/include/chinese-seg/cppjieba/
+#    headercppjieba.files = cppjieba/*
+    INSTALLS += header
+}
+
+
+#DISTFILES += \
+#    jiaba/jieba.pri
+
+DISTFILES += \
+    development-files/header-files/* \
+    pinyin4cpp/pinyin4cpp.pri
--- a/libchinese-segmentation/libchinese-segmentation_global.h
+++ b/libchinese-segmentation/libchinese-segmentation_global.h
@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2020, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: zhangzihao <zhangzihao@kylinos.cn>
+ * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
+ *
+ */
+#ifndef CHINESESEGMENTATION_GLOBAL_H
+#define CHINESESEGMENTATION_GLOBAL_H
+
+#include <QtCore/qglobal.h>
+
+#if defined(CHINESESEGMENTATION_LIBRARY)
+#  define CHINESESEGMENTATION_EXPORT Q_DECL_EXPORT
+#else
+#  define CHINESESEGMENTATION_EXPORT Q_DECL_IMPORT
+#endif
+
+#endif // CHINESESEGMENTATION_GLOBAL_H
--- a/libchinese-segmentation/pinyin4cpp-common.h
+++ b/libchinese-segmentation/pinyin4cpp-common.h
@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#ifndef PINYIN4CPP_COMMON_H
+#define PINYIN4CPP_COMMON_H
+
+/**
+ * @brief The PinyinDataStyle enum
+ * Default 默认模式，“中心” return “zhong xin”
+ * Tone 带读音模式 #“中心” return “zhōng xīn”
+ * Tone2 带读音模式2 #“中心” return “zho1ng xi1n”
+ * Tone3 带读音模式3 #“中心” return “zhong1 xin1”
+ * FirstLetter 首字母模式 #“中心” return “z x”
+ * English 英文翻译模式(暂不支持) #“中心” return “center,heart,core”
+ */
+enum class PinyinDataStyle {
+    Default       = 1u << 0,
+    Tone          = 1u << 1,
+    Tone2         = 1u << 2,
+    Tone3         = 1u << 3,
+    FirstLetter   = 1u << 4,
+    English       = 1u << 5
+};
+
+/**
+ * @brief The SegType enum
+ * Segmentation 默认带分词 #“银河麒麟”->“银河”“麒麟”
+ * NoSegmentation 无分词模式 #“银河麒麟”
+ */
+enum class SegType {
+    Segmentation    = 1u << 0,
+    NoSegmentation  = 1u << 1
+};
+
+/**
+ * @brief The PolyphoneType enum
+ * Disable 默认不启用多音字，“奇安信”return “qi an xin”多音字按照常用读音返回
+ * Enable 启用多音字 “奇安信” return“qi,ji an xin”
+ * 注意：多音词返回格式为 “朝阳” return "zhao/chao yang/yang"
+ */
+enum class PolyphoneType {
+    Disable       = 1u << 0,
+    Enable        = 1u << 1
+};
+
+/**
+ * @brief The ExDataProcessType enum
+ * Default 默认无拼音数据直接返回，“123木头人” return "123 mu tou ren"（分词模式）
+ * Delete  删除多余数据，#“123木头人” return "mu tou ren"（分词模式）
+ */
+enum class ExDataProcessType {
+    Default       = 1u << 0,
+    Delete        = 1u << 1
+};
+
+#endif //PINYIN4CPP_COMMON_H
--- a/libchinese-segmentation/pinyin4cpp/dict/singleWordPinyin.txt
+++ b/libchinese-segmentation/pinyin4cpp/dict/singleWordPinyin.txt
--- a/libchinese-segmentation/pinyin4cpp/dict/wordsPinyin.txt
+++ b/libchinese-segmentation/pinyin4cpp/dict/wordsPinyin.txt
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.cpp
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.cpp
@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#include "pinyin4cpp-trie.h"
+
+Pinyin4cppTrie::Pinyin4cppTrie(string dat_cache_path)
+    : StorageBase<char, false, CacheFileHeaderBase>(vector<string>{SINGLE_WORD_PINYIN_PATH, WORDS_PINYIN_PATH}, dat_cache_path)
+{
+    this->Init();
+}
+
+Pinyin4cppTrie::Pinyin4cppTrie(const vector<string> file_paths, string dat_cache_path)
+    : StorageBase<char, false, CacheFileHeaderBase>(file_paths, dat_cache_path)
+{
+    this->Init();
+}
+
+
+
+bool Pinyin4cppTrie::Contains(string &word) {
+    if (this->Find(word) != string())
+        return true;
+    return false;
+}
+
+bool Pinyin4cppTrie::IsMultiTone(const string &word) {
+    string result = this->Find(word);
+    if (result.find(",") == result.npos)
+        return true;
+    return false;
+}
+
+void Pinyin4cppTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
+{
+    CacheFileHeaderBase header;
+    assert(sizeof(header.md5_hex) == md5.size());
+    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
+    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+    umask(S_IWGRP | S_IWOTH);
+    const int fd =mkstemp((char *)tmp_filepath.data());
+    assert(fd >= 0);
+    fchmod(fd, 0644);
+
+    write_bytes = write(fd, (const char *)&header, sizeof(CacheFileHeaderBase));
+
+    this->LoadSingleWordDict(fd, write_bytes, offset, elements_num);
+    this->LoadWordsDict(fd, write_bytes, offset, elements_num);
+
+    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
+
+    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
+    write(fd, &elements_num, sizeof(int));
+    write(fd, &offset, sizeof(int));
+    data_trie_size = this->GetDataTrieSize();
+    write(fd, &data_trie_size, sizeof(int));
+
+    close(fd);
+    assert((size_t)write_bytes == sizeof(CacheFileHeaderBase) + offset + this->GetDataTrieTotalSize());
+
+    tryRename(tmp_filepath, dat_cache_file);
+}
+
+string Pinyin4cppTrie::Find(const string &key)
+{
+    int result = this->ExactMatchSearch(key.c_str(), key.size());
+    if (result < 0)
+        return string();
+    return string(&this->GetElementPtr()[result]);
+}
+
+void Pinyin4cppTrie::LoadSingleWordDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
+{
+    ifstream ifs(SINGLE_WORD_PINYIN_PATH);
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, ":");
+        if (buf.size() != 3)
+            continue;
+        this->Update(buf[2].c_str(), buf[2].size(), offset);
+        offset += (buf[1].size() + 1);
+        elements_num++;
+        write_bytes += write(fd, buf[1].c_str(), buf[1].size() + 1);
+    }
+}
+
+void Pinyin4cppTrie::LoadWordsDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
+{
+    ifstream ifs(WORDS_PINYIN_PATH);
+    string line;
+    vector<string> buf;
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#") or line.empty()) {
+            continue;
+        }
+        limonp::Split(line, buf, ":");
+        if (buf.size() != 2)
+            continue;
+        this->Update(buf[0].c_str(), buf[0].size(), offset);
+        offset += (buf[1].size() + 1);
+        elements_num++;
+        write_bytes += write(fd, buf[1].c_str(), buf[1].size() + 1);
+    }
+}
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.h
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.h
@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef PINYIN4CPPTRIE_H
+#define PINYIN4CPPTRIE_H
+
+#include "storage-base.hpp"
+
+const char * const  SINGLE_WORD_PINYIN_PATH = "/usr/share/ukui-search/res/dict/singleWordPinyin.txt";
+const char * const  WORDS_PINYIN_PATH = "/usr/share/ukui-search/res/dict/wordsPinyin.txt";
+
+class Pinyin4cppTrie : public StorageBase<char, false, CacheFileHeaderBase>
+{
+public:
+    Pinyin4cppTrie(string dat_cache_path = "");
+    Pinyin4cppTrie(const vector<string> file_paths, string dat_cache_path = "");
+    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
+    string Find(const string &key);
+    bool Contains(string &word);
+    bool IsMultiTone(const string &word);
+
+private:
+    void LoadSingleWordDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
+    void LoadWordsDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
+};
+
+#endif // PINYIN4CPPTRIE_H
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp.pri
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp.pri
@ -0,0 +1,15 @@
+INCLUDEPATH += $$PWD
+
+HEADERS += \
+    $$PWD/pinyin4cpp-trie.h \
+    $$PWD/pinyin4cpp_dataTrie.h \
+    $$PWD/pinyin4cpp_dictTrie.h
+
+SOURCES += \
+    $$PWD/pinyin4cpp-trie.cpp \
+    $$PWD/pinyin4cpp_dataTrie.cpp \
+    $$PWD/pinyin4cpp_dictTrie.cpp
+
+DISTFILES += \
+    pinyin4cpp/dict/wordsPinyin.txt \
+    pinyin4cpp/dict/singleWordPinyin.txt
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.cpp
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.cpp
@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#include "pinyin4cpp_dataTrie.h"
+
+Pinyin4cppDataTrie::Pinyin4cppDataTrie()
+{
+
+}
+
+Pinyin4cppDataTrie::~Pinyin4cppDataTrie()
+{
+    munmap(m_mmapAddr, m_mmapLength);
+    m_mmapAddr = nullptr;
+    close(m_mmapFd);
+    m_mmapFd = -1;
+}
+
+string Pinyin4cppDataTrie::Find(const string &key) const {
+//    darts-clone的接口方法
+    Darts::DoubleArray::result_pair_type find_result;
+    m_DoubleArrayDataTrie.exactMatchSearch(key.c_str(), find_result);
+    if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= m_elementsSize)) {//todo
+        return string();
+    }
+    return string(&m_elementsPtr[find_result.value]);
+
+//  cedarpp的接口方法
+//    int result = m_DoubleArrayDataTrie.exactMatchSearch<int>(key.c_str(), key.size());
+//    if (result < 0)
+//        return string();
+//    return string(&m_elementsPtr[result]);
+
+}
+
+bool Pinyin4cppDataTrie::InitBuildDat(map<string, string> &elements, const string &dat_cache_file, const string &md5) {
+    BuildDatCache(elements, dat_cache_file, md5);
+    return InitAttachDat(dat_cache_file, md5);
+}
+
+bool Pinyin4cppDataTrie::InitAttachDat(const string &dat_cache_file, const string &md5) {
+    m_mmapFd = open(dat_cache_file.c_str(), O_RDONLY);
+
+    if (m_mmapFd < 0) {
+        return false;
+    }
+
+    const auto seek_off = lseek(m_mmapFd, 0, SEEK_END);
+    assert(seek_off >= 0);
+
+    m_mmapLength = static_cast<size_t>(seek_off);
+    m_mmapAddr = reinterpret_cast<char *>(mmap(NULL, m_mmapLength, PROT_READ, MAP_SHARED, m_mmapFd, 0));
+    assert(MAP_FAILED != m_mmapAddr);
+    assert(m_mmapLength >= sizeof(CacheFileHeader));
+
+    CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(m_mmapAddr);
+    m_elementsNum = header.elements_num;
+    m_elementsSize = header.elements_size;
+    assert(sizeof(header.md5_hex) == md5.size());
+
+    if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
+        return false;
+    }
+
+    assert(m_mmapLength == sizeof(CacheFileHeader) + header.elements_size  + header.dat_size * m_DoubleArrayDataTrie.unit_size());
+
+    m_elementsPtr = (const char *)(m_mmapAddr + sizeof(CacheFileHeader));
+    const char * dat_ptr = m_mmapAddr + sizeof(CacheFileHeader) + header.elements_size;
+    m_DoubleArrayDataTrie.set_array((char *)dat_ptr, header.dat_size);
+    return true;
+}
+
+void Pinyin4cppDataTrie::BuildDatCache(map<string, string> &elements, const string &dat_cache_file, const string &md5) {
+    vector<const char*> keys_ptr_vec;
+    vector<int> values_vec;
+    vector<string> mem_elem_vec;
+
+    keys_ptr_vec.reserve(elements.size());
+    values_vec.reserve(elements.size());
+    mem_elem_vec.reserve(elements.size());
+
+    CacheFileHeader header;
+    assert(sizeof(header.md5_hex) == md5.size());
+    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
+
+    int offset(0);
+    for (auto &info:elements) {
+        keys_ptr_vec.push_back(info.first.c_str());
+        values_vec.push_back(offset);
+        offset += (info.second.size() + 1);//+1指字符串后加\0
+        assert(info.second.size() > 0);
+        mem_elem_vec.push_back(info.second);
+    }
+
+    auto const ret = m_DoubleArrayDataTrie.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
+    assert(0 == ret);
+    header.elements_num = mem_elem_vec.size();
+    header.elements_size = offset;
+    header.dat_size = m_DoubleArrayDataTrie.size();
+
+    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
+    umask(S_IWGRP | S_IWOTH);
+    const int fd =mkstemp((char *)tmp_filepath.data());
+    assert(fd >= 0);
+    fchmod(fd, 0644);
+
+    auto write_bytes = write(fd, (const char *)&header, sizeof(header));
+    for (size_t i = 0; i < elements.size(); ++i) {
+        write_bytes += write(fd, mem_elem_vec[i].c_str(), mem_elem_vec[i].size() + 1);
+    }
+    write_bytes += write(fd, m_DoubleArrayDataTrie.array(), m_DoubleArrayDataTrie.total_size());
+
+    assert((size_t)write_bytes == sizeof(header) + offset + m_DoubleArrayDataTrie.total_size());
+    close(fd);
+
+    const auto rename_ret = rename(tmp_filepath.c_str(), dat_cache_file.c_str());
+    assert(0 == rename_ret);
+}
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.h
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.h
@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+#ifndef PINYIN4cpp_DATATRIE_H
+#define PINYIN4cpp_DATATRIE_H
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <QDebug>
+#include "Md5.hpp"
+#include "LocalVector.hpp"
+#include "StringUtil.hpp"
+//#define USE_REDUCED_TRIE
+#include "../storage-base/cedar/cedar.h"
+#include "../storage-base/darts-clone/darts.h"
+
+using namespace std;
+using std::pair;
+
+struct CacheFileHeader { //todo 字节对齐
+    char md5_hex[32] = {};
+    uint32_t elements_num = 0;
+    uint32_t elements_size = 0;
+    uint32_t dat_size = 0;
+};
+
+class Pinyin4cppDataTrie {
+public:
+    Pinyin4cppDataTrie();
+    ~Pinyin4cppDataTrie();
+
+    string Find(const string & key) const;
+
+    bool InitBuildDat(map<string, string>& elements, const string & dat_cache_file, const string & md5);
+
+    bool InitAttachDat(const string & dat_cache_file, const string & md5);
+
+private:
+    void BuildDatCache(map<string, string>& elements, const string & dat_cache_file, const string & md5);
+
+    Pinyin4cppDataTrie(const Pinyin4cppDataTrie &);
+    Pinyin4cppDataTrie &operator=(const Pinyin4cppDataTrie &);
+
+private:
+    Darts::DoubleArray m_DoubleArrayDataTrie;
+    //cedar::da<int, -1, -2, true> m_DoubleArrayDataTrie;
+    const char * m_elementsPtr = nullptr;
+    size_t m_elementsNum = 0;
+    size_t m_elementsSize = 0;
+    size_t m_mmapLength = 0;
+
+    int    m_mmapFd = -1;
+    char * m_mmapAddr = nullptr;
+};
+
+#endif //PINYIN4cpp_DATATRIE_H
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dictTrie.cpp
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dictTrie.cpp
@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2022, KylinSoft Co., Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
+ *
+ */
+
+#include "pinyin4cpp_dictTrie.h"
+#include "malloc.h"
+
+Pinyin4cppDictTrie::Pinyin4cppDictTrie(const string &single_word_dict_path, const string &words_dict_paths, const string &dat_cache_path) {
+    Init(single_word_dict_path, words_dict_paths, dat_cache_path);
+}
+
+string Pinyin4cppDictTrie::Find(const string &word) const {
+    return m_DataTrie.Find(word);
+}
+
+bool Pinyin4cppDictTrie::Contains(string &word) {
+    if (m_DataTrie.Find(word) != string())
+        return true;
+    return false;
+}
+
+bool Pinyin4cppDictTrie::IsMultiTone(const string &word) {
+    string result = m_DataTrie.Find(word);
+    if (result.find(",") == result.npos)
+        return true;
+    return false;
+}
+
+size_t Pinyin4cppDictTrie::GetTotalDictSize() const {
+    return m_TotalDictSize_;
+}
+
+void Pinyin4cppDictTrie::Init(const string &single_word_dict_path, const string &words_dict_paths, string dat_cache_path) {
+    const auto dict_list = single_word_dict_path + "|" + words_dict_paths;
+    size_t file_size_sum = 0;
+    const string md5 = CalcFileListMD5(dict_list, file_size_sum);
+    m_TotalDictSize_ = file_size_sum;
+
+    if (dat_cache_path.empty()) {
+        dat_cache_path = "/tmp/" + md5 + ".dat_cache";//未指定词库数据文件存储位置的默认存储在tmp目录下
+    }
+    qDebug() << "#####Pinyin Dict path:" << dat_cache_path.c_str();
+    if (m_DataTrie.InitAttachDat(dat_cache_path, md5)) {
+        return;
+    }
+
+    LoadSingleWordDict(single_word_dict_path);
+    LoadWordsDict(words_dict_paths);
+    bool build_ret = m_DataTrie.InitBuildDat(m_StaticNodeInfos, dat_cache_path, md5);
+    assert(build_ret);
+    m_StaticNodeInfos.clear();
+    malloc_trim(0);
+}
+
+void Pinyin4cppDictTrie::LoadSingleWordDict(const string &filePath) {
+    ifstream ifs(filePath.c_str());
+    string line;
+    vector<string> buf;
+
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#")) {
+            continue;
+        }
+        limonp::Split(line, buf, ":");
+        assert(buf.size() == SINGLE_WORD_DICT_COLUMN_NUM);
+        if (m_StaticNodeInfos.find(buf[2]) != m_StaticNodeInfos.end()) {
+            vector<string> tmp;
+            bool isfind(false);
+            limonp::Split(m_StaticNodeInfos[buf[2]], tmp, ",");
+            for (auto &onePinyin:tmp) {
+                if (onePinyin == buf[1]) {
+                    isfind = true;
+                    break;
+                }
+            }
+            if (!isfind) {
+                m_StaticNodeInfos[buf[2]] += ("," + buf[2]);
+            }
+        } else {
+            m_StaticNodeInfos[buf[2]] = buf[1];
+        }
+    }
+}
+
+void Pinyin4cppDictTrie::LoadWordsDict(const string &filePath) {
+    ifstream ifs(filePath.c_str());
+    string line;
+    vector<string> buf;
+    for (; getline(ifs, line);) {
+        if (limonp::StartsWith(line, "#")) {
+            continue;
+        }
+        limonp::Split(line, buf, ":");
+        assert(buf.size() == WORDS_DICT_COLUMN_NUM);
+        if (m_StaticNodeInfos.find(buf[0]) != m_StaticNodeInfos.end()) {
+            vector<string> tmp;
+            bool isfind(false);
+            limonp::Split(m_StaticNodeInfos[buf[0]], tmp, "/");
+            for (auto &onePinyin:tmp) {
+                if (onePinyin == buf[1]) {
+                    isfind = true;
+                    break;
+                }
+            }
+            if (!isfind) {
+                m_StaticNodeInfos[buf[0]] += ("/" + buf[1]);
+            }
+        } else {
+            m_StaticNodeInfos[buf[0]] = buf[1];
+        }
+    }
+}
+
+string CalcFileListMD5(const string &files_list, size_t &file_size_sum) {
+    limonp::MD5 md5;
+
+    const auto files = limonp::Split(files_list, "|;");
+    file_size_sum = 0;
+
+    for (auto const & local_path : files) {
+        const int fd = open(local_path.c_str(), O_RDONLY);
+        if (fd < 0){
+            continue;
+        }
+        auto const len = lseek(fd, 0, SEEK_END);
+        if (len > 0) {
+            void * addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+            assert(MAP_FAILED != addr);
+
+            md5.Update((unsigned char *) addr, len);
+            file_size_sum += len;
+
+            munmap(addr, len);
+        }
+        close(fd);
+    }
+
+    md5.Final();
+    return string(md5.digestChars);
+}
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`libsearch/.qm/*.qm usr/share/ukui-search/translations`
				`@ -1 +0,0 @@`
				`Subproject commit 4734827d7c31936f1485e4513316b05cb7c8714f`