117 changed files with 63 additions and 1247775 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,53 @@
+# C++ objects and libs
+*.slo
+*.lo
+*.o
+*.a
+*.la
+*.lai
+*.so
+*.so.*
+*.rc
+*.dll
+*.dylib
+
+# Qt-es
+object_script.*.Release
+object_script.*.Debug
+*_plugin_import.cpp
+/.qmake.cache
+/.qmake.stash
+*.pro.user
+*.pro.user.*
+*.qbs.user
+*.qbs.user.*
+*.moc
+moc_*.cpp
+moc_*.h
+qrc_*.cpp
+ui_*.h
+*.qmlc
+*.jsc
+Makefile*
+*build-*
+*.qm
+*.prl
+
+# Qt unit tests
+target_wrapper.*
+
+# QtCreator
+*.autosave
+
+# QtCreator Qml
+*.qmlproject.user
+*.qmlproject.user.*
+
+# QtCreator CMake
+CMakeLists.txt.user*
+
+# QtCreator 4.8< compilation database 
+compile_commands.json
+
+# QtCreator local machine specific files for imported projects
+*creator.user*
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "libchinese-segmentation"]
+	path = libchinese-segmentation
+	url = https://gitee.com/openkylin/chinese-segmentation.git
--- a/debian/changelog
+++ b/debian/changelog
@ -1,291 +0,0 @@
-ukui-search (4.0.2.3-ok0) yangtze; urgency=medium
-
-  * Issues:https://gitee.com/openkylin/ukui-search/issues/I7E1IK?from=project-issue
-  * 其他改动：
-    - 补充设置页藏文翻译
-    - 解决当搜索UI关闭后未正确停止插件搜索线程的问题
-    - 解决遍历搜索时文件搜索插件长时间cpu占用较高的问题
-    - 解决文件搜索接口在使用索引搜索根目录时无法搜到文件的问题
-  * 其他改动影响域：
-    - 控制面板设置页
-    - 全局搜索应用UI结果列表
-    - 文件搜索功能
-    - 文件搜索接口
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Mon, 19 Jun 2023 14:28:06 +0800
-
-ukui-search (4.0.2.2-ok0) yangtze; urgency=medium
-
-  * Issues:https://gitee.com/openkylin/ukui-search/issues/I72NP7
-  * 其他改动：无
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 07 Jun 2023 15:44:31 +0800
-
-ukui-search (4.0.2.1-ok0) yangtze; urgency=medium
-
-  * Issues:NULL
-  * 其他改动：
-    - 修复添加索引目录后索引状态未更新的问题
-    - 搜索接口增加搜索结果通知机制和状态查询功能
-    - 搜索设置页面添加搜索目录时增加对无访问权限目录的过滤弹窗
-    - 优化搜索设置页面弹窗文案
-    - 修复了一个分词库的问题，该问题曾导致搜索或搜索服务在首次启动时偶现崩溃
-  * 其他改动影响域:
-    - 搜索服务接口，搜索设置页面增加搜索目录功能，搜索和搜索服务稳定性提升
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 06 Jun 2023 15:25:44 +0800
-
-ukui-search (4.0.1.5-ok0) yangtze; urgency=medium
-
-  * Issues:NULL
-  * 其他改动：
-    - 解决一处潜在的崩溃问题
-    - 修复搜索插件生命周期管理内存泄漏问题
-    - 修复搜索到需要安装的应用时无法正确跳转到软件商店的问题
-    - 修复偶现刚登陆时搜索会卡顿一段时间的问题
-    - 优化黑名单配置后台逻辑
-  * 其他改动影响域:
-    - 应用搜索功能；索引服务稳定性；升级时配置同步
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 11 May 2023 18:00:59 +0800
-
-ukui-search (4.0.1.3-ok0) yangtze; urgency=medium
-
-  * 更新版本号.
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 27 Apr 2023 14:11:36 +0800
-
-ukui-search (4.0.1.2-ok0~0421) yangtze; urgency=medium
-
-  * Issues:NULL
-  * 其他改动：
-    - 解决全局搜索中遍历搜索不能搜索到顶层文件夹的问题
-    - 解决索引提示弹窗会挡住模态窗口的问题
-    - 修复了一个问题，该问题曾导致添加曾经删除过的搜索目录时无法正确索引
-  * 其他改动影响域：
-    - 不开索引时的文件夹搜索功能
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 20 Apr 2023 11:40:39 +0800
-
-ukui-search (4.0.1.1-ok0~0419) yangtze; urgency=medium
-
-  * 备注:修复编译问题，重新传包
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 19 Apr 2023 10:53:46 +0800
-
-ukui-search (4.0.1.0-ok0~0418build1) yangtze; urgency=medium
-
-  * 备注:重新传包
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 18 Apr 2023 17:48:00 +0800
-
-ukui-search (4.0.1.0-ok0~0418) yangtze; urgency=medium
-
-  * Issues:NULL
-  * 其他改动：
-    - 应用数据服务增加设置应用是否打开过的接口
-    - 应用数据库增加AUTO_START字段
-    - 优化所有进程的日志打印机制
-  * 其他改动影响域：
-    - 开始菜单应用列表显示；
-    - 日志现在被打印到~/.log/ukui-search/目录下
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 18 Apr 2023 17:03:50 +0800
-
-ukui-search (4.0.0.0-ok0~0413) yangtze; urgency=medium
-
-  * 备注：去除libopencv-dev依赖重新传包
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 13 Apr 2023 17:44:34 +0800
-
-ukui-search (4.0.0.0-ok0~0412build1) yangtze; urgency=medium
-
-  * 备注：重新传包
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 12 Apr 2023 18:51:21 +0800
-
-ukui-search (4.0.0.0-ok0~0412) yangtze; urgency=medium
-
-  * Issues:NULL
-  * 其他改动：
-    - 优化搜索服务目录管理模块
-  * 其他改动影响域：
-    - 文件索引目录配置功能
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 12 Apr 2023 18:12:32 +0800
-
-ukui-search (3.22.5.2-ok0~0324) yangtze; urgency=medium
-
-  * Issues:NULL
-  * 其他改动：
-    - 优化应用数据服务，完善数据库查询和搜索接口实现。
-    - 文件索引服务优化inotify信号处理，解决若干由于信号处理不当导致的索引不正确问题
-    - 完善搜索服务文件搜索接口
-    - 增加UI针对屏幕尺寸变化时的自动更新位置操作
-    - 提升文本内容搜索准确度
-  * 其他改动影响域:
-    - 屏幕旋转时UI位置；应用搜索功能；文件索引服务；文件内容搜索功能。
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Fri, 24 Mar 2023 13:58:05 +0800
-
-ukui-search (3.22.4.2-ok0~0210) yangtze; urgency=medium
-
-  * Issues:NULL
-  * 其他改动：
-    - 解决在控制面板中搜索‘搜索’插件部分设置项无法搜到的问题。
-    - 更新libukui-search的pc文件。
-    - 优化应用搜索功能；修复偶现搜索结果重复问题。
-    - 修复偶现的由于队列处理不当导致的搜索应用崩溃问题。
-    - 修复当通过弹窗打开索引时，控制面板中不显示模糊搜索开关的问题。
-  * 其他改动影响域:
-    - 应用搜索功能；控制面板设置插件；全局搜索应用稳定性。
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Fri, 10 Feb 2023 11:26:15 +0800
-
-ukui-search (3.22.4.1-ok0~0105) yangtze; urgency=medium
-
-  * Issues:https://gitee.com/openkylin/ukui-search/issues/I64CCY?from=project-issue
-  * 其他改动：
-    - 增加文件监听对unmount信号的处理。
-    - 更新设置搜索接口，解决设置搜索不准确的问题。
-    - 解决在目录发生变动时，控制面板索引目录选择页面为更新的问题。
-    - 解决了一些内存泄露问题。
-    - 解决系统字体变化时UI未能及时更新的问题。
-    - 增加桌面小插件不随系统缩放更改尺寸的特性。
-  * 其他改动影响域：
-    - 全局搜索UI；文件索引功能；设置项搜索功能;桌面搜索插件;控制面板搜索设置页。
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 05 Jan 2023 10:43:22 +0800
-
-ukui-search (3.22.3.1-ok0~1222) yangtze; urgency=medium
-
-  * Issues:NULL
-  * 其他改动：
-    - 解决平板模式下搜索结果列表右侧被滚动条遮挡的问题。
-    - 修复反复开关索引导致的索引异常问题（偶现打开索引失败）。
-    - 解决注册文本内容插件命令失效问题。
-    - 搜索服务应用数据库新增跟随版本更新机制。
-    - 更新README.md。
-  * 其他改动影响域：
-    - 全局搜索UI；文件索引功能；应用搜索功能；应用数据服务。
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Thu, 22 Dec 2022 11:36:41 +0800
-
-ukui-search (3.22.3.0-ok0~1212) yangtze; urgency=medium
-
-  * Issues: https://gitee.com/openkylin/ukui-search/issues/I658EV?from=project-issue
-            https://gitee.com/openkylin/ukui-search/issues/I657OY?from=project-issue
-            https://gitee.com/openkylin/ukui-search/issues/I657G6?from=project-issue
-  * 其他改动：
-    - 完善汉字转拼音接口逻辑，解决多音字识别不准确的问题。
-    - 增加索引数据库版本号机制，版本号升级时将重建数据库。
-    - 修复UI最佳匹配与结果分类触摸行为不一致的问题。
-    - 增加控制面板插件模糊搜索开关，添加搜索目录功能。
-    - 解决添加和删除索引目录异常的问题。
-    - 解决wayland下失焦后再激活窗口后焦点异常问题。
-    - 增加了一个用于调试的索引状态监控页面，可通过'ukui-search-service -m'打开
-  * 其他改动影响域：
-    - 全局搜索UI；文件索引功能；控制面板插件；拼音搜索功能。
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Mon, 12 Dec 2022 15:49:35 +0800
-
-ukui-search (3.22.2.1-ok0~1130) yangtze; urgency=medium
-
-  * Issues: https://gitee.com/openkylin/ukui-search/issues/I63XWF?from=project-issue
-  * 其他改动：无
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 30 Nov 2022 16:44:46 +0800
-
-ukui-search (3.22.2.0-ok0~1129build1) yangtze; urgency=medium
-
-  * 解决编译问题
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 29 Nov 2022 15:15:40 +0800
-
-ukui-search (3.22.2.0-ok0~1129) yangtze; urgency=medium
-
-  * Issues: NULL
-  * 任务号：无
-  * 其他改动：
-    - 增加搜索应用插件动态注册、注销、排序功能.
-    - 优化文件索引操作.
-    - 增加文件内容模糊搜索功能.
-  * 其他改动说明：
-    - 搜索应用插件的注册、注销与排序目前能通过命令行实现。
-    - 取消了文件索引服务的子进程，优化了文件索引的资源占用，索引
-      服务暂停后再启动采取增量更新模式。
-    - 增加了一个gsettings,用于设置文本内容的搜索模式。
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 29 Nov 2022 14:47:34 +0800
-
-ukui-search (3.22.1.0-ok1~1122) yangtze; urgency=medium
-
-  * 更新版本号
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 22 Nov 2022 11:26:21 +0800
-
-ukui-search (3.22.1.0-ok0~1122build2) yangzte; urgency=medium
-
-  * 更新版本号
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 22 Nov 2022 11:06:40 +0800
-
-ukui-search (3.22.1.0-ok0~1122build1) yangtze; urgency=medium
-
-  * 重新传包
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 22 Nov 2022 10:50:53 +0800
-
-ukui-search (3.22.1.0-ok0~1122) yangtze; urgency=medium
-
-  * Issues: https://gitee.com/openkylin/ukui-search/issues/I5XA17
-  * 任务号：无
-  * 其他改动：
-    - 修复结果列表文字显示不居中的问题.
-    - 搜索插件接口增加一个反向调用接口，应用搜索增加截图打开时隐藏主页面动作。
-    - 解决平板模式下，单机结果项直接执行打开动作，无法打开详情页的问题。
-    - Optimize the contructor and pluginUi function of search plugin for ukcc.
-    - 修复内容搜索详情页加载图片失败导致的崩溃问题.
-    - 修复控制面板搜索插件崩溃的问题.
-    - Fixed: the app-data-service crashes when the destructor is executed.
-    - 同步分词模块<更新分词基础词库内容，分词准确性提升>.
-  * 其他改动影响域：搜索UI；文本内容索引分词效果。
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Tue, 22 Nov 2022 10:29:34 +0800
-
-ukui-search (3.22.0.0-ok0~1021) yangtze; urgency=medium
-
-  * BUG: 无
-  * 任务号：无
-  * 其他改动：同步主线代码，增加平板桌面小插件功能
-  * 其他改动影响域：全局搜索自身，平板桌面小插件
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Fri, 21 Oct 2022 11:13:06 +0800
-
-ukui-search (3.14.0.6-ok2~0720) yangtze; urgency=medium
-
-  * BUG: 无
-  * 任务号：无
-  * 其他改动：使用kysdk接口，在wayland环境设置跳过任务栏和多任务视图显示；
-              Fix bug:The background dose not follow the theme to change. 
-  * 其他改动影响域：搜索UI显示。
-
- -- zhangpengfei <zhangpengfei@kylinos.cn>  Wed, 20 Jul 2022 15:04:40 +0800
-
-ukui-search (3.14.0.6-ok2~0716) yangtze; urgency=medium
-
-  * build for openKylin
-  * 适配应用管理器接口
-  * 使用kysdk接口以解决wayland环境下窗口位置异常问题双标题栏问题.
-  * 修改dbus参数由qurl改为qstring
-  * Add a mainWindow switch dbus interface.
-  * sync #125632 action label's color doesn't change whth the heightlight color.
-  * sync #127327 adjust the heightlight effect of ukcc plugin's switchbutton
-  * 处理富文本超长问题
-  * 优化关键词高亮方案，结果项显示效果适配主题框架。
-  * 修复标题栏展开置顶后，点击收回位置错误问题
-  * 增加正则表达式转义，避免偶现的搜索UI卡死现象。
-  * close-cd #127732 #127792 the ukui-search do not quit while losing focus.
-
- -- Xie Wei <xiewei@kylinos.cn>  Sat, 18 Jun 2022 10:43:24 +0800
-
--- a/debian/control
+++ b/debian/control
@ -1,118 +0,0 @@
-Source: ukui-search
-Section: utils
-Priority: optional
-Maintainer: zhangpengfei <zhangpengfei@kylinos.cn>
-Build-Depends: debhelper-compat (=12),
-               pkgconf,
-               libgsettings-qt-dev,
-               qtbase5-dev,
-               qt5-qmake,
-               qtchooser,
-               qtscript5-dev,
-               qttools5-dev-tools,
-               libxapian-dev,
-               libquazip5-dev(>=0.7.6-6build1),
-               libglib2.0-dev,
-               libkf5windowsystem-dev,
-               libqt5x11extras5-dev,
-               libuchardet-dev,
-               libpoppler-qt5-dev,
-               libukui-log4qt-dev,
-               libqt5xdg-dev,
-               libukcc-dev,
-               libtesseract-dev,
-               libleptonica-dev,
-               libkysdk-waylandhelper-dev,
-               libkysdk-qtwidgets-dev,
-               libukui-appwidget-manager-dev,
-               libukui-appwidget-provider-dev,
-               libukui-appwidget-qmlplugin0,
-               qml-module-org-ukui-stylehelper,
-               qtdeclarative5-dev
-Standards-Version: 4.5.0
-Rules-Requires-Root: no
-Homepage: https://www.ukui.org/
-Vcs-Git:  https://gitee.com/openkylin/ukui-search.git
-Vcs-Browser: https://gitee.com/openkylin/ukui-search
-
-Package: ukui-search
-Architecture: any
-Depends: ${misc:Depends},
-         ${shlibs:Depends},
-         libukui-search2 (= ${binary:Version}),
-         ukui-search-service (= ${binary:Version}),
-Description: A user-wide desktop search
- feature of UKUI desktop environment.
-
-Package: ukui-search-service
-Architecture: any
-Depends: ${misc:Depends},
-         ${shlibs:Depends},
-Replaces: libukui-search0,libukui-search2 (<=4.0.1.1-ok0~0419)
-Breaks: libukui-search0,libukui-search2 (<=4.0.1.1-ok0~0419)
-Description: Data service for search function in UKUI desktop environment.
-
-Package: libchinese-segmentation1
-Section: libs
-Architecture: any
-Depends: ${misc:Depends},
-         ${shlibs:Depends},
-         libchinese-segmentation-common (= ${source:Version}),
-Description: Libraries for chinese-segmentation
- .
- This package contains a runtime library needed by 
- ukui-search's file index function.
-
-Package: libchinese-segmentation-common
-Architecture: any
-Depends: ${misc:Depends},
-Replaces: libchinese-segmentation0,libchinese-segmentation1 (<=4.0.1.1-ok0~0419)
-Breaks: libchinese-segmentation0,libchinese-segmentation1 (<=4.0.1.1-ok0~0419)
-Description: Extra files for chinese-segmentation
- .
- This package contains dicts used by chinese-segmentation.
-
-Package: libchinese-segmentation-dev
-Section: libdevel
-Architecture: any
-Depends: ${misc:Depends},
-         libchinese-segmentation1 (= ${binary:Version}),
-Description: Libraries for chinese-segmentation(development files).
-
-Package: libukui-search2
-Section: libs
-Architecture: any
-Depends: ${misc:Depends},
-         ${shlibs:Depends},
-         libchinese-segmentation1 (= ${binary:Version}),
-         libukui-search-common (= ${source:Version}),
-         ukui-search-systemdbus (= ${binary:Version})
-Provides: libukui-search,
-Description: Libraries for ukui-search. 
- .
- This package contains a runtime library needed by
- ukui-search and it's extensions.
-
-Package: libukui-search-common
-Architecture: any
-Depends: ${misc:Depends},
-Replaces: ukui-search (<= 3.14.0.6-ok2~0720),libukui-search0,libukui-search2 (<=4.0.1.1-ok0~0419)
-Breaks: ukui-search (<= 3.14.0.6-ok2~0720),libukui-search0,libukui-search2 (<=4.0.1.1-ok0~0419)
-Description: Extra files for libukui-search
- .
- This package contains some extra files for libukui-search,
- for now, translation files only.
-
-Package: libukui-search-dev
-Section: libdevel
-Architecture: any
-Depends: ${misc:Depends},
-         libukui-search2 (= ${binary:Version}),
-Description: Libraries for ukui-search(development files).
-
-Package: ukui-search-systemdbus
-Architecture: any
-Depends: ${shlibs:Depends}, 
-         ${misc:Depends},
-Description: Systembus interface to modify max_user_watches nums 
- permanent.
--- a/debian/copyright
+++ b/debian/copyright
@ -1,67 +0,0 @@
-Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-
-Files: *
-Copyright: 2020, KylinSoft Co., Ltd.
-License: GPL-3+
-
-Files: libchinese-segmentation/cppjieba/limonp/Md5.hpp
-Copyright: 1991, 1992, RSA Data Security, Inc. Created 1991
-License: NTP
-
-Files: src/singleapplication/*
-Copyright: 2013, Digia Plc and/or its subsidiary(-ies)
-License: BSD-3-clause
-
-Files: ukuisearch-systemdbus/*
-Copyright: 2019, Tianjin KYLIN Information Technology Co., Ltd.
-License: GPL-2+
-
-License: BSD-3-clause
- This software is Copyright (c) 2021 by foo.
- This is free software, licensed under:
- The (three-clause) BSD License
- The BSD License
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution. 
- * Neither the name of foo nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission. 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-License: GPL-2+
- This software is Copyright (c) 2021 by foo.
- This is free software, licensed under:
- The GNU General Public License, Version 2, June 1991
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 dated June, 1991, or (at
- your option) any later version.
- On Debian systems, the complete text of version 2 of the GNU General
- Public License can be found in '/usr/share/common-licenses/GPL-2'.
-
-License: GPL-3+
- This software is Copyright (c) 2021 by foo.
- This is free software, licensed under:
- The GNU General Public License, Version 3, June 2007
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 3 dated June, 2007, or (at
- your option) any later version.
- On Debian systems, the complete text of version 3 of the GNU General
- Public License can be found in '/usr/share/common-licenses/GPL-3'.
--- a/debian/libchinese-segmentation-common.install
+++ b/debian/libchinese-segmentation-common.install
@ -1,2 +0,0 @@
-/usr/share/ukui-search/res/dict/*.utf8
-/usr/share/ukui-search/res/dict/*.txt
--- a/debian/libchinese-segmentation-dev.install
+++ b/debian/libchinese-segmentation-dev.install
@ -1,3 +0,0 @@
-usr/include/chinese-seg/*
-usr/lib/*/pkgconfig/chinese-segmentation.pc
-usr/lib/*/libchinese-segmentation.so
--- a/debian/libchinese-segmentation1.install
+++ b/debian/libchinese-segmentation1.install
@ -1 +0,0 @@
-usr/lib/*/libchinese-segmentation.so.*
--- a/debian/libukui-search-common.install
+++ b/debian/libukui-search-common.install
@ -1 +0,0 @@
-libsearch/.qm/*.qm usr/share/ukui-search/translations
--- a/debian/libukui-search-dev.install
+++ b/debian/libukui-search-dev.install
@ -1,3 +0,0 @@
-usr/include/ukui-search/*
-usr/lib/*/pkgconfig/ukui-search.pc
-usr/lib/*/libukui-search.so
--- a/debian/libukui-search2.install
+++ b/debian/libukui-search2.install
@ -1 +0,0 @@
-usr/lib/*/libukui-search.so.*
--- a/debian/rules
+++ b/debian/rules
@ -1,25 +0,0 @@
-#!/usr/bin/make -f
-# See debhelper(7) (uncomment to enable)
-# output every command that modifies files on the build system.
-#export DH_VERBOSE = 1
-
-
-# see FEATURE AREAS in dpkg-buildflags(1)
-# export QT_SELECT=5
-
-# see ENVIRONMENT in dpkg-buildflags(1)
-# package maintainers to append CFLAGS
-#export DEB_CFLAGS_MAINT_APPEND  = -Wall -pedantic
-# package maintainers to append LDFLAGS
-#export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed
-
-
-%:
-	dh $@ 
-
-
-# dh_make generated override targets
-# This is example for Cmake (See https://bugs.debian.org/641051 )
-#override_dh_auto_configure:
-#	dh_auto_configure -- #	-DCMAKE_LIBRARY_PATH=$(DEB_HOST_MULTIARCH)
-
--- a/debian/source/format
+++ b/debian/source/format
@ -1 +0,0 @@
-3.0 (quilt)
--- a/debian/ukui-search-service.install
+++ b/debian/ukui-search-service.install
@ -1,9 +0,0 @@
-usr/bin/ukui-search-service
-usr/bin/ukui-search-app-data-service
-usr/bin/ukui-search-service-dir-manager
-etc/xdg/autostart/ukui-search-service-dir-manager.desktop
-etc/xdg/autostart/ukui-search-app-data-service.desktop
-etc/xdg/autostart/ukui-search-service.desktop
-usr/share/dbus-1/services/com.ukui.search.appdb.service
-usr/share/dbus-1/services/com.ukui.search.fileindex.service
-usr/share/glib-2.0/schemas/org.ukui.search.data.gschema.xml
--- a/debian/ukui-search-systemdbus.install
+++ b/debian/ukui-search-systemdbus.install
@ -1,3 +0,0 @@
-/usr/share/dbus-1/system-services/com.ukui.search.qt.systemdbus.service
-/etc/dbus-1/system.d/com.ukui.search.qt.systemdbus.conf
-/usr/bin/ukui-search-systemdbus
--- a/debian/ukui-search.install
+++ b/debian/ukui-search.install
@ -1,16 +0,0 @@
-usr/bin/ukui-search
-etc/xdg/autostart/ukui-search.desktop
-usr/share/applications/ukui-search-menu.desktop
-frontend/.qm/zh_CN.qm usr/share/ukui-search/translations
-frontend/.qm/bo_CN.qm usr/share/ukui-search/translations
-frontend/.qm/tr.qm usr/share/ukui-search/translations
-usr/share/glib-2.0/schemas/org.ukui.log4qt.ukui-search.gschema.xml
-usr/lib/*/ukui-control-center/*
-usr/share/ukui-search/search-ukcc-plugin/translations/*
-search-ukcc-plugin/.qm/*.qm usr/share/ukui-search/search-ukcc-plugin/translations
-usr/share/ukui-search/search-ukcc-plugin/image/*
-
-usr/share/dbus-1/services/org.ukui.appwidget.provider.search.service
-/usr/share/appwidget/*
-frontend/.qm/search_bo_CN.qm usr/share/appwidget/translations
-frontend/.qm/search_zh_CN.qm usr/share/appwidget/translations
--- a/1
+++ b/1
@ -0,0 +1 @@
+Subproject commit 4734827d7c31936f1485e4513316b05cb7c8714f
--- a/libchinese-segmentation/LICENSE
+++ b/libchinese-segmentation/LICENSE
@ -1,674 +0,0 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.  We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors.  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights.  Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received.  You must make sure that they, too, receive
-or can get the source code.  And you must show them these terms so they
-know their rights.
-
-  Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
-  For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software.  For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-  Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so.  This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software.  The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable.  Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products.  If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-  Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary.  To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Use with the GNU Affero General Public License.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    <program>  Copyright (C) <year>  <name of author>
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
--- a/libchinese-segmentation/README.md
+++ b/libchinese-segmentation/README.md
@ -1,170 +0,0 @@
-# chinese-segmentation
-
-#### 介绍
-libchinese-segmentation工程以单例的形式分别提供了中文分词、汉字转拼音和中文繁体简体转换功能。
-
-接口文件分别为:
-chinese-segmentation.h
-libchinese-segmentation_global.h
-common-struct.h
-
-hanzi-to-pinyin.h
-pinyin4cpp-common.h
-
-Traditional-to-Simplified.h
-安装路径:/usr/include/chinese-seg
-
-#### 使用说明
-
-其中中文分词相关功能由chinese-segmentation.h提供接口，主要包括以下功能函数：
-
-```
-   static ChineseSegmentation *getInstance();//全局单例
-     /**
-     * @brief ChineseSegmentation::callSegment
-     * 调用extractor进行关键词提取，先使用Mix方式初步分词，再使用Idf词典进行关键词提取，只包含两字以上关键词
-     *
-     * @param sentence 要提取关键词的句子
-     * @return vector<KeyWord> 存放提取后关键词的信息的容器
-     */
-    vector<KeyWord> callSegment(const string &sentence);
-    vector<KeyWord> callSegment(QString &sentence);
-
-    /**
-     * @brief ChineseSegmentation::callMixSegmentCutStr
-     * 使用Mix方法进行分词，即先使用最大概率法MP初步分词，再用隐式马尔科夫模型HMM进一步分词，可以准确切出词典已有词和未登录词，结果比较准确
-     *
-     * @param sentence 要分词的句子
-     * @return vector<string> 只存放分词后每个词的内容的容器
-     */
-    vector<string> callMixSegmentCutStr(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callMixSegmentCutWord
-     * 和callMixSegmentCutStr功能相同
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callMixSegmentCutWord(const string& str);
-
-    /**
-     * @brief ChineseSegmentation::lookUpTagOfWord
-     * 查询word的词性
-     * @param word 要查询词性的词
-     * @return string word的词性
-     */
-    string lookUpTagOfWord(const string& word);
-
-    /**
-     * @brief ChineseSegmentation::getTagOfWordsInSentence
-     * 使用Mix分词后获取每个词的词性
-     * @param sentence 要分词的句子
-     * @return vector<pair<string, string>> 分词后的每个词的内容(firsr)和其对应的词性(second)
-     */
-    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
-
-    /**
-     * @brief ChineseSegmentation::callFullSegment
-     * 使用Full进行分词，Full会切出字典里所有的词。
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callFullSegment(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callQuerySegment
-     * 使用Query进行分词，即先使用Mix，对于长词再用Full，结果最精确，但词的数量也最大
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callQuerySegment(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callHMMSegment
-     * 使用隐式马尔科夫模型HMM进行分词
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callHMMSegment(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callMPSegment
-     * 使用最大概率法MP进行分词
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callMPSegment(const string& sentence);
-
-```
-
-汉字转拼音相关功能由hanzi-to-pinyin.h提供接口，主要包括以下功能函数：
-
-```
-    static HanZiToPinYin * getInstance();//全局单例
-
-    /**
-     * @brief HanZiToPinYin::isMultiTone 判断是否为多音字/词/句
-     * @param word 要判断的字/词/句
-     * @return bool 不是返回false
-     */
-    bool isMultiTone(string &word);
-    bool isMultiTone(string &&word);
-    bool isMultiTone(const string &word);
-    bool isMultiTone(const string &&word);
-
-    /**
-     * @brief HanZiToPinYin::contains 查询某个字/词/句是否有拼音（是否在数据库包含）
-     * @param word 要查询的字/词/句
-     * @return bool 数据库不包含返回false
-     */
-    bool contains(string &word);
-
-    /**
-     * @brief HanZiToPinYin::getResults 获取某个字/词/句的拼音
-     * @param word 要获取拼音的字/词/句
-     * @param results word的拼音列表（有可能多音字），每次调用results会被清空
-     * @return int 获取到返回0，否则返回-1
-     */
-    int getResults(string word, QStringList &results);
-
-    /**
-     * @brief setConfig 设置HanZiToPinYin的各项功能，详见pinyin4cpp-common.h
-     * @param dataStyle 返回数据风格，默认defult
-     * @param segType 是否启用分词，默认启用
-     * @param polyphoneType 是否启用多音字，默认不启用
-     * @param processType 无拼音数据处理模式，默认defult
-     */
-    void setConfig(PinyinDataStyle dataStyle,SegType segType,PolyphoneType polyphoneType,ExDataProcessType processType);
-
-```
-
-中文繁体转简体相关功能由Traditional-to-Simplified.h提供接口，主要包括以下功能函数：
-
-```
-    static Traditional2Simplified * getInstance();//全局单例
-    /**
-     * @brief Traditional2Simplified::isMultiTone 判断是否为繁体字，是则返回true
-     * @param oneWord 要判断的字
-     * @return bool 不是返回false
-     */
-    bool isTraditional(string &oneWord);
-
-    /**
-     * @brief Traditional2Simplified::getResults 转换某个字/词/句的繁体字
-     * @param words 要转换为简体中文的字/词/句
-     * @return words 的简体中文结果
-     */
-    string getResults(string words);
-
-```
-
-除此之外工程中提供了测试程序位于chinese-segmentation/test，运行界面如下：
-![输入图片说明](https://foruda.gitee.com/images/1682048388802220746/245a2ec3_8021248.png "image.png")
-
-#### 参与贡献
-
-1.  Fork 本仓库
-2.  新建分支
-3.  提交代码
-4.  新建 Pull Request
-
--- a/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified.pri
+++ b/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified.pri
@ -1,10 +0,0 @@
-INCLUDEPATH += $$PWD
-
-HEADERS += \
-    $$PWD/Traditional2Simplified_trie.h
-
-SOURCES += \
-    $$PWD/Traditional2Simplified_trie.cpp
-
-DISTFILES += \
-    Traditional-Chinese-Simplified-conversion/dict/TraditionalChineseSimplifiedDict.txt 
--- a/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified_trie.cpp
+++ b/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified_trie.cpp
@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2023, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-#include "Traditional2Simplified_trie.h"
-
-Traditional2SimplifiedTrie::Traditional2SimplifiedTrie(string dat_cache_path)
-    : StorageBase<char, false, CacheFileHeaderBase>(vector<string>{TRADITIONAL_CHINESE_SIMPLIFIED_DICT_PATH}, dat_cache_path)
-{
-    this->Init();
-}
-
-Traditional2SimplifiedTrie::Traditional2SimplifiedTrie(const vector<string> file_paths, string dat_cache_path)
-    : StorageBase<char, false, CacheFileHeaderBase>(file_paths, dat_cache_path)
-{
-    this->Init();
-}
-
-bool Traditional2SimplifiedTrie::IsTraditional(const string &word) {
-    string result = this->Find(word);
-    if (!result.empty())
-        return true;
-    return false;
-}
-
-void Traditional2SimplifiedTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
-{
-    CacheFileHeaderBase header;
-    assert(sizeof(header.md5_hex) == md5.size());
-    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
-    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-    umask(S_IWGRP | S_IWOTH);
-    const int fd =mkstemp((char *)tmp_filepath.data());
-    assert(fd >= 0);
-    fchmod(fd, 0644);
-
-    write_bytes = write(fd, (const char *)&header, sizeof(CacheFileHeaderBase));
-
-    this->LoadDict(fd, write_bytes, offset, elements_num);
-
-    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
-
-    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
-    write(fd, &elements_num, sizeof(int));
-    write(fd, &offset, sizeof(int));
-    data_trie_size = this->GetDataTrieSize();
-    write(fd, &data_trie_size, sizeof(int));
-
-    close(fd);
-    assert((size_t)write_bytes == sizeof(CacheFileHeaderBase) + offset + this->GetDataTrieTotalSize());
-
-    tryRename(tmp_filepath, dat_cache_file);
-}
-
-string Traditional2SimplifiedTrie::Find(const string &key)
-{
-    int result = this->ExactMatchSearch(key.c_str(), key.size());
-    if (result < 0)
-        return string();
-    return string(&this->GetElementPtr()[result]);
-}
-
-void Traditional2SimplifiedTrie::LoadDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
-{
-    ifstream ifs(TRADITIONAL_CHINESE_SIMPLIFIED_DICT_PATH);
-    string line;
-    vector<string> buf;
-
-    for (; getline(ifs, line);) {
-        if (limonp::StartsWith(line, "#") or line.empty()) {
-            continue;
-        }
-        limonp::Split(line, buf, ":");
-        if (buf.size() != 2)
-            continue;
-        this->Update(buf[0].c_str(), buf[0].size(), offset);
-        offset += (buf[1].size() + 1);
-        elements_num++;
-        write_bytes += write(fd, buf[1].c_str(), buf[1].size() + 1);
-    }
-}
--- a/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified_trie.h
+++ b/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/Traditional2Simplified_trie.h
@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2023, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-#ifndef Traditional2SimplifiedTrie_H
-#define Traditional2SimplifiedTrie_H
-
-#include "storage-base.hpp"
-
-const char * const  TRADITIONAL_CHINESE_SIMPLIFIED_DICT_PATH = "/usr/share/ukui-search/res/dict/TraditionalChineseSimplifiedDict.txt";
-
-class Traditional2SimplifiedTrie : public StorageBase<char, false, CacheFileHeaderBase>
-{
-public:
-    Traditional2SimplifiedTrie(string dat_cache_path = "");
-    Traditional2SimplifiedTrie(const vector<string> file_paths, string dat_cache_path = "");
-    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
-    string Find(const string &key);
-    bool IsTraditional(const string &word);
-
-private:
-    void LoadDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
-};
-
-#endif // Traditional2SimplifiedTrie_H
--- a/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/dict/TraditionalChineseSimplifiedDict.txt
+++ b/libchinese-segmentation/Traditional-Chinese-Simplified-conversion/dict/TraditionalChineseSimplifiedDict.txt
--- a/libchinese-segmentation/Traditional-to-Simplified-private.h
+++ b/libchinese-segmentation/Traditional-to-Simplified-private.h
@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2023, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-
-#ifndef Traditional2SimplifiedPRIVATE_H
-#define Traditional2SimplifiedPRIVATE_H
-
-#include <QtCore/qglobal.h>
-#include <QHash>
-#include "Traditional-to-Simplified.h"
-#include "Traditional2Simplified_trie.h"
-
-using namespace std;
-
-class TRADITIONAL_CHINESE_SIMPLIFIED_EXPORT Traditional2SimplifiedPrivate
-{
-public:
-    Traditional2SimplifiedPrivate(Traditional2Simplified *parent = nullptr);
-    ~Traditional2SimplifiedPrivate();
-
-public:
-    bool isTraditional(string &word) {return m_Traditional2SimplifiedTrie.IsTraditional(word);}
-
-    string getResults(string words);
-
-private:
-
-    Traditional2Simplified *q = nullptr;
-    Traditional2SimplifiedTrie m_Traditional2SimplifiedTrie;
-};
-#endif // Traditional2SimplifiedPRIVATE_H
--- a/libchinese-segmentation/Traditional-to-Simplified.cpp
+++ b/libchinese-segmentation/Traditional-to-Simplified.cpp
@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2023, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-
-#include <mutex>
-#include <cctype>
-#include "Traditional-to-Simplified.h"
-#include "Traditional-to-Simplified-private.h"
-#include "cppjieba/Unicode.hpp"
-
-Traditional2Simplified * Traditional2Simplified::g_Traditional2SimplifiedManager = nullptr;
-std::once_flag g_Traditional2SimplifiedSingleFlag;
-
-string Traditional2SimplifiedPrivate::getResults(string words)
-{
-    string results;
-    if (words.empty()) {
-        return words;
-    } else if (cppjieba::IsSingleWord(words)) {//单个字符
-        results = m_Traditional2SimplifiedTrie.Find(words);
-        if (results.empty()) {
-            results = words;//原数据返回
-        }
-    } else {//多个字符
-        string oneWord;
-        string data;
-        cppjieba::RuneStrArray runeArray;
-        cppjieba::DecodeRunesInString(words, runeArray);
-        for (auto i = runeArray.begin(); i != runeArray.end(); ++i) {
-            oneWord = cppjieba::GetStringFromRunes(words, i, i);
-            data = m_Traditional2SimplifiedTrie.Find(oneWord);
-            if (data.empty()) {//单字无结果
-                results.append(oneWord);
-            } else {
-                results.append(data);
-            }
-        }
-    }
-    return results;
-}
-
-Traditional2SimplifiedPrivate::Traditional2SimplifiedPrivate(Traditional2Simplified *parent) : q(parent)
-{
-}
-
-Traditional2SimplifiedPrivate::~Traditional2SimplifiedPrivate()
-{
-}
-
-Traditional2Simplified * Traditional2Simplified::getInstance()
-{
-    call_once(g_Traditional2SimplifiedSingleFlag, []() {
-        g_Traditional2SimplifiedManager = new Traditional2Simplified;
-    });
-    return g_Traditional2SimplifiedManager;
-}
-
-bool Traditional2Simplified::isTraditional(string &oneWord)
-{
-    return d->isTraditional(oneWord);
-}
-
-string Traditional2Simplified::getResults(string words)
-{
-    return d->getResults(words);
-}
-
-Traditional2Simplified::Traditional2Simplified() : d(new Traditional2SimplifiedPrivate)
-{
-}
--- a/libchinese-segmentation/Traditional-to-Simplified.h
+++ b/libchinese-segmentation/Traditional-to-Simplified.h
@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2023, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-
-#ifndef Traditional2Simplified_H
-#define Traditional2Simplified_H
-
-#include <QtCore/qglobal.h>
-#include <string>
-#define TRADITIONAL_CHINESE_SIMPLIFIED_EXPORT Q_DECL_IMPORT
-
-using namespace std;
-
-class Traditional2SimplifiedPrivate;
-class TRADITIONAL_CHINESE_SIMPLIFIED_EXPORT Traditional2Simplified
-{
-public:
-    static Traditional2Simplified * getInstance();
-
-public:
-    /**
-     * @brief Traditional2Simplified::isMultiTone 判断是否为繁体字，是则返回true
-     * @param oneWord 要判断的字
-     * @return bool 不是返回false
-     */
-    bool isTraditional(string &oneWord);
-
-    /**
-     * @brief Traditional2Simplified::getResults 转换某个字/词/句的繁体字
-     * @param words 要转换为简体中文的字/词/句
-     * @return words 的简体中文结果
-     */
-    string getResults(string words);
-
-protected:
-    Traditional2Simplified();
-    ~Traditional2Simplified();
-    Traditional2Simplified(const Traditional2Simplified&) = delete;
-    Traditional2Simplified& operator =(const Traditional2Simplified&) = delete;
-private:
-    static Traditional2Simplified *g_Traditional2SimplifiedManager;
-    Traditional2SimplifiedPrivate *d = nullptr;
-};
-
-#endif // PINYINMANAGER_H
--- a/libchinese-segmentation/chinese-segmentation-private.h
+++ b/libchinese-segmentation/chinese-segmentation-private.h
@ -1,34 +0,0 @@
-#ifndef CHINESESEGMENTATIONPRIVATE_H
-#define CHINESESEGMENTATIONPRIVATE_H
-
-#include "chinese-segmentation.h"
-#include "cppjieba/Jieba.hpp"
-#include "cppjieba/KeywordExtractor.hpp"
-
-class ChineseSegmentationPrivate
-{
-public:
-    explicit ChineseSegmentationPrivate(ChineseSegmentation *parent = nullptr);
-    ~ChineseSegmentationPrivate();
-    vector<KeyWord> callSegment(const string& sentence);
-    vector<KeyWord> callSegment(QString& sentence);
-
-    vector<string> callMixSegmentCutStr(const string& sentence);
-    vector<Word> callMixSegmentCutWord(const string& sentence);
-    string lookUpTagOfWord(const string& word);
-    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
-
-    vector<Word> callFullSegment(const string& sentence);
-
-    vector<Word> callQuerySegment(const string& sentence);
-
-    vector<Word> callHMMSegment(const string& sentence);
-
-    vector<Word> callMPSegment(const string& sentence);
-
-private:
-    cppjieba::Jieba *m_jieba;
-    ChineseSegmentation *q = nullptr;
-};
-
-#endif // CHINESESEGMENTATIONPRIVATE_H
--- a/libchinese-segmentation/chinese-segmentation.cpp
+++ b/libchinese-segmentation/chinese-segmentation.cpp
@ -1,178 +0,0 @@
-/*
- * Copyright (C) 2020, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: zhangzihao <zhangzihao@kylinos.cn>
- * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
- *
- */
-#include "chinese-segmentation.h"
-#include "chinese-segmentation-private.h"
-
-ChineseSegmentationPrivate::ChineseSegmentationPrivate(ChineseSegmentation *parent) : q(parent)
-{
-    //const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
-    const char * const  HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
-    //const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
-    //const char * const  IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
-    const char * const  STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
-    m_jieba = new cppjieba::Jieba(DICT_PATH,
-                                  HMM_PATH,
-                                  USER_DICT_PATH,
-                                  IDF_DICT_PATH,
-                                  STOP_WORD_PATH,
-                                  "");
-}
-
-ChineseSegmentationPrivate::~ChineseSegmentationPrivate() {
-    if(m_jieba)
-        delete m_jieba;
-    m_jieba = nullptr;
-}
-
-vector<KeyWord> ChineseSegmentationPrivate::callSegment(const string &sentence) {
-    const size_t topk = -1;
-    vector<KeyWord> keywordres;
-    ChineseSegmentationPrivate::m_jieba->extractor.Extract(sentence, keywordres, topk);
-
-    return keywordres;
-
-}
-
-vector<KeyWord> ChineseSegmentationPrivate::callSegment(QString &sentence) {
-    //'\xEF\xBC\x8C' is "，" "\xE3\x80\x82" is "。"  use three " " to replace ,to ensure the offset info.
-    sentence = sentence.replace("\t", " ").replace("\xEF\xBC\x8C", "   ").replace("\xE3\x80\x82", "   ");
-    const size_t topk = -1;
-    vector<KeyWord> keywordres;
-    ChineseSegmentationPrivate::m_jieba->extractor.Extract(sentence.left(20480000).toStdString(), keywordres, topk);
-
-    return keywordres;
-
-}
-
-vector<string> ChineseSegmentationPrivate::callMixSegmentCutStr(const string &sentence)
-{
-    vector<string> keywordres;
-    ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
-    return keywordres;
-}
-
-vector<Word> ChineseSegmentationPrivate::callMixSegmentCutWord(const string &sentence)
-{
-    vector<Word> keywordres;
-    ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
-    return keywordres;
-}
-
-string ChineseSegmentationPrivate::lookUpTagOfWord(const string &word)
-{
-    return ChineseSegmentationPrivate::m_jieba->LookupTag(word);
-}
-
-vector<pair<string, string>> ChineseSegmentationPrivate::getTagOfWordsInSentence(const string &sentence)
-{
-     vector<pair<string, string>> words;
-     ChineseSegmentationPrivate::m_jieba->Tag(sentence, words);
-     return words;
-}
-
-vector<Word> ChineseSegmentationPrivate::callFullSegment(const string &sentence)
-{
-    vector<Word> keywordres;
-    ChineseSegmentationPrivate::m_jieba->CutAll(sentence, keywordres);
-    return keywordres;
-}
-
-vector<Word> ChineseSegmentationPrivate::callQuerySegment(const string &sentence)
-{
-    vector<Word> keywordres;
-    ChineseSegmentationPrivate::m_jieba->CutForSearch(sentence, keywordres);
-    return keywordres;
-}
-
-vector<Word> ChineseSegmentationPrivate::callHMMSegment(const string &sentence)
-{
-    vector<Word> keywordres;
-    ChineseSegmentationPrivate::m_jieba->CutHMM(sentence, keywordres);
-    return keywordres;
-}
-
-vector<Word> ChineseSegmentationPrivate::callMPSegment(const string &sentence)
-{
-    size_t maxWordLen = 512;
-    vector<Word> keywordres;
-    ChineseSegmentationPrivate::m_jieba->CutSmall(sentence, keywordres, maxWordLen);
-    return keywordres;
-}
-
-ChineseSegmentation *ChineseSegmentation::getInstance()
-{
-    static ChineseSegmentation *global_instance_chinese_segmentation = new ChineseSegmentation;
-    return global_instance_chinese_segmentation;
-}
-
-vector<KeyWord> ChineseSegmentation::callSegment(const string &sentence)
-{
-    return d->callSegment(sentence);
-}
-
-vector<KeyWord> ChineseSegmentation::callSegment(QString &sentence)
-{
-    return d->callSegment(sentence);
-}
-
-vector<string> ChineseSegmentation::callMixSegmentCutStr(const string &sentence)
-{
-    return d->callMixSegmentCutStr(sentence);
-}
-
-vector<Word> ChineseSegmentation::callMixSegmentCutWord(const string &str)
-{
-    return d->callMixSegmentCutWord(str);
-}
-
-string ChineseSegmentation::lookUpTagOfWord(const string &word)
-{
-    return d->lookUpTagOfWord(word);
-}
-
-vector<pair<string, string> > ChineseSegmentation::getTagOfWordsInSentence(const string &sentence)
-{
-    return d->getTagOfWordsInSentence(sentence);
-}
-
-vector<Word> ChineseSegmentation::callFullSegment(const string &sentence)
-{
-    return d->callFullSegment(sentence);
-}
-
-vector<Word> ChineseSegmentation::callQuerySegment(const string &sentence)
-{
-    return d->callQuerySegment(sentence);
-}
-
-vector<Word> ChineseSegmentation::callHMMSegment(const string &sentence)
-{
-    return d->callHMMSegment(sentence);
-}
-
-vector<Word> ChineseSegmentation::callMPSegment(const string &sentence)
-{
-    return d->callMPSegment(sentence);
-}
-
-ChineseSegmentation::ChineseSegmentation() : d(new ChineseSegmentationPrivate)
-{
-}
--- a/libchinese-segmentation/chinese-segmentation.h
+++ b/libchinese-segmentation/chinese-segmentation.h
@ -1,118 +0,0 @@
-/*
- * Copyright (C) 2020, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: zhangzihao <zhangzihao@kylinos.cn>
- * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
- *
- */
-#ifndef CHINESESEGMENTATION_H
-#define CHINESESEGMENTATION_H
-
-#include <QString>
-#include "libchinese-segmentation_global.h"
-#include "common-struct.h"
-
-class ChineseSegmentationPrivate;
-class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
-public:
-    static ChineseSegmentation *getInstance();
-
-    /**
-     * @brief ChineseSegmentation::callSegment
-     * 调用extractor进行关键词提取，先使用Mix方式初步分词，再使用Idf词典进行关键词提取，只包含两字以上关键词
-     *
-     * @param sentence 要提取关键词的句子
-     * @return vector<KeyWord> 存放提取后关键词的信息的容器
-     */
-    vector<KeyWord> callSegment(const string &sentence);
-    vector<KeyWord> callSegment(QString &sentence);
-
-    /**
-     * @brief ChineseSegmentation::callMixSegmentCutStr
-     * 使用Mix方法进行分词，即先使用最大概率法MP初步分词，再用隐式马尔科夫模型HMM进一步分词，可以准确切出词典已有词和未登录词，结果比较准确
-     *
-     * @param sentence 要分词的句子
-     * @return vector<string> 只存放分词后每个词的内容的容器
-     */
-    vector<string> callMixSegmentCutStr(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callMixSegmentCutWord
-     * 和callMixSegmentCutStr功能相同
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callMixSegmentCutWord(const string& str);
-
-    /**
-     * @brief ChineseSegmentation::lookUpTagOfWord
-     * 查询word的词性
-     * @param word 要查询词性的词
-     * @return string word的词性
-     */
-    string lookUpTagOfWord(const string& word);
-
-    /**
-     * @brief ChineseSegmentation::getTagOfWordsInSentence
-     * 使用Mix分词后获取每个词的词性
-     * @param sentence 要分词的句子
-     * @return vector<pair<string, string>> 分词后的每个词的内容(firsr)和其对应的词性(second)
-     */
-    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
-
-    /**
-     * @brief ChineseSegmentation::callFullSegment
-     * 使用Full进行分词，Full会切出字典里所有的词。
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callFullSegment(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callQuerySegment
-     * 使用Query进行分词，即先使用Mix，对于长词再用Full，结果最精确，但词的数量也最大
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callQuerySegment(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callHMMSegment
-     * 使用隐式马尔科夫模型HMM进行分词
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callHMMSegment(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callMPSegment
-     * 使用最大概率法MP进行分词
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callMPSegment(const string& sentence);
-
-private:
-    explicit ChineseSegmentation();
-    ~ChineseSegmentation() = default;
-    ChineseSegmentation(const ChineseSegmentation&) = delete;
-    ChineseSegmentation& operator =(const ChineseSegmentation&) = delete;
-
-private:
-    ChineseSegmentationPrivate *d = nullptr;
-};
-
-#endif // CHINESESEGMENTATION_H
--- a/libchinese-segmentation/common-struct.h
+++ b/libchinese-segmentation/common-struct.h
@ -1,52 +0,0 @@
-#ifndef COMMONSTRUCT_H
-#define COMMONSTRUCT_H
-
-#include <string>
-#include <vector>
-
-using namespace std;
-
-/**
- * @brief The KeyWord struct
- *
- * @property word the content of keyword
- * @property offsets the Unicode offsets, can be used to check the word pos in a sentence
- * @property weight the weight of the keyword
- */
-
-struct KeyWord {
-    string word;
-    vector<size_t> offsets;
-    double weight;
-    ~KeyWord() {
-        word = std::move("");
-        offsets.clear();
-        offsets.shrink_to_fit();
-    }
-};
-
-/**
- * @brief The Word struct
- *
- * @property word the content of word
- * @property offset the offset of the word(absolute pos, Chinese 3 , English 1)， can be used to check the word pos in a sentence
- * @property unicode_offset the Unicode offset of the word
- * @property unicode_length the Unicode length of the word
- */
-struct Word {
-    string word;
-    uint32_t offset;
-    uint32_t unicode_offset;
-    uint32_t unicode_length;
-    Word(const string& w, uint32_t o)
-        : word(w), offset(o) {
-    }
-    Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
-        : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
-    }
-    ~Word() {
-        word = std::move("");
-    }
-}; // struct Word
-
-#endif // COMMONSTRUCT_H
--- a/libchinese-segmentation/cppjieba/DatTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DatTrie.hpp
@ -1,641 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <QDebug>
-
-#include <algorithm>
-#include <utility>
-
-#include "limonp/Md5.hpp"
-#include "Unicode.hpp"
-//#define USE_DARTS_CLONE
-#ifdef USE_DARTS_CLONE
-#include "../storage-base/darts-clone/darts.h"
-#else
-#include "../storage-base/cedar/cedar.h"
-#endif
-
-namespace cppjieba {
-
-using std::pair;
-
-struct DatElement {
-    string word;
-    string tag;
-    double weight = 0;
-
-    bool operator < (const DatElement & b) const {
-        if (word == b.word) {
-            return this->weight > b.weight;
-        }
-
-        return this->word < b.word;
-    }
-};
-
-struct IdfElement {
-    string word;
-    double idf = 0;
-
-    bool operator < (const IdfElement & b) const {
-        if (word == b.word) {
-            return this->idf > b.idf;
-        }
-
-        return this->word < b.word;
-    }
-};
-
-struct PinYinElement
-{
-    string word;
-    string tag;
-
-    bool operator < (const DatElement & b) const {
-        return this->word < b.word;
-    }
-};
-
-inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
-    return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
-}
-
-struct PinYinMemElem {
-    char tag[6] = {};
-
-    void SetTag(const string & str) {
-        memset(&tag[0], 0, sizeof(tag));
-        strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
-    }
-
-    string GetTag() const {
-        return &tag[0];
-    }
-};
-
-inline std::ostream & operator << (std::ostream& os, const DatMemElem & elem) {
-    return os << "/tag=" << elem.GetTag() << "/weight=" << elem.weight;
-}
-#ifdef USE_DARTS_CLONE
-typedef Darts::DoubleArray JiebaDAT;
-#else
-typedef cedar::da<int, -1, -2, false> JiebaDAT;
-#endif
-
-
-struct CacheFileHeader {
-    char md5_hex[32] = {};
-    double min_weight = 0;
-    uint32_t elements_num = 0;
-    uint32_t dat_size = 0;
-};
-
-static_assert(sizeof(DatMemElem) == 16, "DatMemElem length invalid");
-static_assert((sizeof(CacheFileHeader) % sizeof(DatMemElem)) == 0, "DatMemElem CacheFileHeader length equal");
-
-
-class DatTrie {
-public:
-    DatTrie() {}
-    ~DatTrie() {
-        ::munmap(mmap_addr_, mmap_length_);
-        mmap_addr_ = nullptr;
-        mmap_length_ = 0;
-
-        ::close(mmap_fd_);
-        mmap_fd_ = -1;
-    }
-
-    const DatMemElem * Find(const string & key) const {
-#ifdef USE_DARTS_CLONE
-        JiebaDAT::result_pair_type find_result;
-        dat_.exactMatchSearch(key.c_str(), find_result);
-
-        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
-            return nullptr;
-        }
-
-        return &elements_ptr_[ find_result.value ];
-#else
-        int result = dat_.exactMatchSearch<int>(key.c_str());
-        if (result < 0)
-            return nullptr;
-        return &elements_ptr_[result];
-#endif
-    }
-
-    const double Find(const string & key, std::size_t length, std::size_t node_pos) const {
-#ifdef USE_DARTS_CLONE
-        JiebaDAT::result_pair_type find_result;
-        dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos);
-
-        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
-            return -1;
-        }
-
-        return idf_elements_ptr_[ find_result.value ];
-#else
-        int result = dat_.exactMatchSearch<int>(key.c_str(), length, node_pos);
-        if (result < 0)
-            return -1;
-        return idf_elements_ptr_[result];
-#endif
-    }
-
-    const PinYinMemElem * PinYinFind(const string & key) const {
-#ifdef USE_DARTS_CLONE
-        JiebaDAT::result_pair_type find_result;
-        dat_.exactMatchSearch(key.c_str(), find_result);
-
-        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
-            return nullptr;
-        }
-
-        return &pinyin_elements_ptr_[ find_result.value ];
-#else
-        int result = dat_.exactMatchSearch<int>(key.c_str());
-        if (result < 0)
-            return nullptr;
-        return &pinyin_elements_ptr_[result];
-#endif
-    }
-
-    void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
-              vector<struct DatDag>&res, size_t max_word_len) const {
-
-        res.clear();
-        res.resize(end - begin);
-
-        string text_str;
-        EncodeRunesToString(begin, end, text_str);
-
-        static const size_t max_num = 128;
-        JiebaDAT::result_pair_type result_pairs[max_num] = {};
-
-        for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) {
-
-            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
-
-            res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + 1, nullptr));
-
-            for (std::size_t idx = 0; idx < num_results; ++idx) {
-                auto & match = result_pairs[idx];
-
-                if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
-                    continue;
-                }
-
-                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
-
-                if (char_num > max_word_len) {
-                    continue;
-                }
-
-                auto pValue = &elements_ptr_[match.value];
-
-                if (1 == char_num) {
-                    res[i].nexts[0].second = pValue;
-                    continue;
-                }
-
-                res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + char_num, pValue));
-            }
-
-            begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune);
-        }
-    }
-
-    /*
-    void Find_Reverse(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
-              vector<struct DatDag>&res, size_t max_word_len) const {
-
-        res.clear();
-        res.resize(end - begin);
-
-        string text_str;
-        EncodeRunesToString(begin, end, text_str);
-
-        static const size_t max_num = 128;
-        JiebaDAT::result_pair_type result_pairs[max_num] = {};
-
-        size_t str_size = end - begin;
-        for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
-
-            begin_pos -= (end - i - 1)->len;
-            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
-            res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - i, nullptr));
-
-            for (std::size_t idx = 0; idx < num_results; ++idx) {
-                auto & match = result_pairs[idx];
-                if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
-                    continue;
-                }
-
-                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
-
-                if (char_num > max_word_len) {
-                    continue;
-                }
-
-                auto pValue = &elements_ptr_[match.value];
-
-                if (1 == char_num) {
-                    res[str_size - i - 1].nexts[0].second = pValue;
-                    continue;
-                }
-
-                res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - 1 - i + char_num, pValue));
-            }
-        }
-    }*/
-
-    void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
-              vector<WordRange>& words, size_t max_word_len) const {
-
-        string text_str;
-        EncodeRunesToString(begin, end, text_str);
-
-        static const size_t max_num = 128;
-        JiebaDAT::result_pair_type result_pairs[max_num] = {};//存放字典查询结果
-        size_t str_size = end - begin;
-        double max_weight[str_size];//存放逆向路径最大weight
-        for (size_t i = 0; i<str_size; i++) {
-            max_weight[i] = -3.14e+100;
-        }
-        int max_next[str_size];//存放动态规划后的分词结果
-        //memset(max_next,-1,str_size);
-
-        double val(0);
-        for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
-            size_t nextPos = str_size - i;//逆向计算
-            begin_pos -= (end - i - 1)->len;
-
-            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
-            if (0 == num_results) {//字典不存在则单独分词
-                val = min_weight_;
-
-                if (nextPos  < str_size) {
-                    val += max_weight[nextPos];
-                }
-                if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
-                    max_weight[nextPos - 1] = val;
-                    max_next[nextPos - 1] = nextPos;
-                }
-            } else {//字典存在则根据查询结果数量计算最大概率路径
-                for (std::size_t idx = 0; idx < num_results; ++idx) {
-                    auto & match = result_pairs[idx];
-                    if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
-                        continue;
-                    }
-                    auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
-                    if (char_num > max_word_len) {
-                        continue;
-                    }
-                    auto pValue = &elements_ptr_[match.value];
-
-                    val = pValue->weight;
-                    if (1 == char_num) {
-                        if (nextPos  < str_size) {
-                            val += max_weight[nextPos];
-                        }
-                        if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
-                            max_weight[nextPos - 1] = val;
-                            max_next[nextPos - 1] = nextPos;
-                        }
-                    } else {
-                        if (nextPos - 1 + char_num  < str_size) {
-                            val += max_weight[nextPos - 1 + char_num];
-                        }
-                        if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) {
-                            max_weight[nextPos - 1] = val;
-                            max_next[nextPos - 1] = nextPos - 1 + char_num;
-                        }
-                    }
-                }
-            }
-        }
-        for (size_t i = 0; i < str_size;) {//统计动态规划结果
-            assert(max_next[i] > i);
-            assert(max_next[i] <= str_size);
-            WordRange wr(begin + i, begin + max_next[i] - 1);
-            words.push_back(wr);
-            i = max_next[i];
-        }
-    }
-    double GetMinWeight() const {
-        return min_weight_;
-    }
-
-    void SetMinWeight(double d) {
-        min_weight_ = d ;
-    }
-
-    bool InitBuildDat(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
-        BuildDatCache(elements, dat_cache_file, md5);
-        return InitAttachDat(dat_cache_file, md5);
-    }
-
-    bool InitBuildDat(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
-        BuildDatCache(elements, dat_cache_file, md5);
-        return InitIdfAttachDat(dat_cache_file, md5);
-    }
-
-    bool InitBuildDat(vector<PinYinElement>& elements, const string & dat_cache_file, const string & md5) {
-        BuildDatCache(elements, dat_cache_file, md5);
-        return InitPinYinAttachDat(dat_cache_file, md5);
-    }
-
-    bool InitAttachDat(const string & dat_cache_file, const string & md5) {
-        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
-
-        if (mmap_fd_ < 0) {
-            return false;
-        }
-
-        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
-        assert(seek_off >= 0);
-        mmap_length_ = seek_off;
-
-        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
-        assert(MAP_FAILED != mmap_addr_);
-
-        assert(mmap_length_ >= sizeof(CacheFileHeader));
-        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
-        elements_num_ = header.elements_num;
-        min_weight_ = header.min_weight;
-        assert(sizeof(header.md5_hex) == md5.size());
-
-        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
-            return false;
-        }
-
-        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(DatMemElem)  + header.dat_size * dat_.unit_size());
-        elements_ptr_ = (const DatMemElem *)(mmap_addr_ + sizeof(header));
-        char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(DatMemElem) * elements_num_;
-        dat_.set_array(dat_ptr, header.dat_size);
-        return true;
-    }
-
-    bool InitIdfAttachDat(const string & dat_cache_file, const string & md5) {
-        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
-
-        if (mmap_fd_ < 0) {
-            return false;
-        }
-
-        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
-        assert(seek_off >= 0);
-        mmap_length_ = seek_off;
-
-        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
-        assert(MAP_FAILED != mmap_addr_);
-
-        assert(mmap_length_ >= sizeof(CacheFileHeader));
-        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
-        elements_num_ = header.elements_num;
-        min_weight_ = header.min_weight;
-        assert(sizeof(header.md5_hex) == md5.size());
-
-        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
-            return false;
-        }
-
-        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double)  + header.dat_size * dat_.unit_size());
-        idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header));
-        char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_;
-        dat_.set_array(dat_ptr, header.dat_size);
-        return true;
-    }
-
-    bool InitPinYinAttachDat(const string & dat_cache_file, const string & md5) {
-        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
-
-        if (mmap_fd_ < 0) {
-            return false;
-        }
-
-        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
-        assert(seek_off >= 0);
-        mmap_length_ = seek_off;
-
-        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
-        assert(MAP_FAILED != mmap_addr_);
-
-        assert(mmap_length_ >= sizeof(CacheFileHeader));
-        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
-        elements_num_ = header.elements_num;
-        min_weight_ = header.min_weight;
-        assert(sizeof(header.md5_hex) == md5.size());
-
-        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
-            return false;
-        }
-
-        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(PinYinMemElem)  + header.dat_size * dat_.unit_size());
-        pinyin_elements_ptr_ = (const PinYinMemElem *)(mmap_addr_ + sizeof(header));
-        char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(PinYinMemElem) * elements_num_;
-        dat_.set_array(dat_ptr, header.dat_size);
-        return true;
-    }
-
-private:
-    void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
-        std::sort(elements.begin(), elements.end());
-
-        vector<const char*> keys_ptr_vec;
-        vector<int> values_vec;
-        vector<DatMemElem> mem_elem_vec;
-
-        keys_ptr_vec.reserve(elements.size());
-        values_vec.reserve(elements.size());
-        mem_elem_vec.reserve(elements.size());
-
-        CacheFileHeader header;
-        header.min_weight = min_weight_;
-        assert(sizeof(header.md5_hex) == md5.size());
-        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-        for (size_t i = 0; i < elements.size(); ++i) {
-            keys_ptr_vec.push_back(elements[i].word.data());
-            values_vec.push_back(i);
-            mem_elem_vec.push_back(DatMemElem());
-            auto & mem_elem = mem_elem_vec.back();
-            mem_elem.weight = elements[i].weight;
-            mem_elem.SetTag(elements[i].tag);
-        }
-
-        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
-        assert(0 == ret);
-        header.elements_num = mem_elem_vec.size();
-        header.dat_size = dat_.size();
-
-        {
-            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-            ::umask(S_IWGRP | S_IWOTH);
-            //const int fd =::mkstemp(&tmp_filepath[0]);
-            const int fd =::mkstemp((char *)tmp_filepath.data());
-            qDebug() << "mkstemp :" << errno << tmp_filepath.data();
-            assert(fd >= 0);
-            ::fchmod(fd, 0644);
-
-            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
-            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
-            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
-
-            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
-            ::close(fd);
-
-            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
-            assert(0 == rename_ret);
-        }
-    }
-
-    void BuildDatCache(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
-        std::sort(elements.begin(), elements.end());
-
-        vector<const char*> keys_ptr_vec;
-        vector<int> values_vec;
-        vector<double> mem_elem_vec;
-
-        keys_ptr_vec.reserve(elements.size());
-        values_vec.reserve(elements.size());
-        mem_elem_vec.reserve(elements.size());
-
-        CacheFileHeader header;
-        header.min_weight = min_weight_;
-        assert(sizeof(header.md5_hex) == md5.size());
-        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-        for (size_t i = 0; i < elements.size(); ++i) {
-            keys_ptr_vec.push_back(elements[i].word.data());
-            values_vec.push_back(i);
-            mem_elem_vec.push_back(elements[i].idf);
-        }
-
-        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
-        assert(0 == ret);
-        header.elements_num = mem_elem_vec.size();
-        header.dat_size = dat_.size();
-
-        {
-            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-            ::umask(S_IWGRP | S_IWOTH);
-            //const int fd =::mkstemp(&tmp_filepath[0]);
-            const int fd =::mkstemp((char *)tmp_filepath.data());
-            qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
-            assert(fd >= 0);
-            ::fchmod(fd, 0644);
-
-            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
-            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(double) * mem_elem_vec.size());
-            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
-
-            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(double) + dat_.total_size());
-            ::close(fd);
-
-            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
-            assert(0 == rename_ret);
-        }
-    }
-
-    void BuildDatCache(vector<PinYinElement>& elements, const string & dat_cache_file, const string & md5) {
-        //std::sort(elements.begin(), elements.end());
-
-        vector<const char*> keys_ptr_vec;
-        vector<int> values_vec;
-        vector<PinYinMemElem> mem_elem_vec;
-
-        keys_ptr_vec.reserve(elements.size());
-        values_vec.reserve(elements.size());
-        mem_elem_vec.reserve(elements.size());
-
-        CacheFileHeader header;
-        header.min_weight = min_weight_;
-        assert(sizeof(header.md5_hex) == md5.size());
-        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-        for (size_t i = 0; i < elements.size(); ++i) {
-            keys_ptr_vec.push_back(elements[i].word.data());
-            values_vec.push_back(i);
-            mem_elem_vec.push_back(PinYinMemElem());
-            auto & mem_elem = mem_elem_vec.back();
-            mem_elem.SetTag(elements[i].tag);
-        }
-
-        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
-        assert(0 == ret);
-        header.elements_num = mem_elem_vec.size();
-        header.dat_size = dat_.size();
-
-        {
-            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-            ::umask(S_IWGRP | S_IWOTH);
-            //const int fd =::mkstemp(&tmp_filepath[0]);
-            const int fd =::mkstemp((char *)tmp_filepath.data());
-            qDebug() << "mkstemp :" << errno << tmp_filepath.data();
-            assert(fd >= 0);
-            ::fchmod(fd, 0644);
-
-            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
-            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
-            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
-
-            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
-            ::close(fd);
-
-            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
-            assert(0 == rename_ret);
-        }
-    }
-
-    DatTrie(const DatTrie &);
-    DatTrie &operator=(const DatTrie &);
-
-private:
-    JiebaDAT dat_;
-    const DatMemElem * elements_ptr_ = nullptr;
-    const double * idf_elements_ptr_ = nullptr;
-    const PinYinMemElem * pinyin_elements_ptr_ = nullptr;
-    size_t elements_num_ = 0;
-    double min_weight_ = 0;
-
-    int mmap_fd_ = -1;
-    size_t mmap_length_ = 0;
-    char * mmap_addr_ = nullptr;
-};
-
-
-inline string CalcFileListMD5(const string & files_list, size_t & file_size_sum) {
-    limonp::MD5 md5;
-
-    const auto files = limonp::Split(files_list, "|;");
-    file_size_sum = 0;
-
-    for (auto const & local_path : files) {
-        const int fd = ::open(local_path.c_str(), O_RDONLY);
-        if( fd < 0){
-            continue;
-        }
-        auto const len = ::lseek(fd, 0, SEEK_END);
-        if (len > 0) {
-            void * addr = ::mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
-            assert(MAP_FAILED != addr);
-
-            md5.Update((unsigned char *) addr, len);
-            file_size_sum += len;
-
-            ::munmap(addr, len);
-        }
-        ::close(fd);
-    }
-
-    md5.Final();
-    return string(md5.digestChars);
-}
-
-}
--- a/libchinese-segmentation/cppjieba/DictTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DictTrie.hpp
@ -1,234 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <string>
-#include <cstring>
-#include <cstdlib>
-#include <stdint.h>
-#include <cmath>
-#include <limits>
-#include "limonp/StringUtil.hpp"
-#include "limonp/Logging.hpp"
-#include "Unicode.hpp"
-#include "DatTrie.hpp"
-#include <QDebug>
-namespace cppjieba {
-
-using namespace limonp;
-
-const double MAX_DOUBLE = 3.14e+100;
-const size_t DICT_COLUMN_NUM = 3;
-const char* const UNKNOWN_TAG = "";
-
-class DictTrie {
-public:
-    enum UserWordWeightOption {
-        WordWeightMin,
-        WordWeightMedian,
-        WordWeightMax,
-    }; // enum UserWordWeightOption
-
-    DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "",
-             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
-        Init(dict_path, user_dict_paths, dat_cache_path, user_word_weight_opt);
-    }
-
-    ~DictTrie() {}
-
-    const DatMemElem* Find(const string & word) const {
-        return dat_.Find(word);
-    }
-
-    void FindDatDag(RuneStrArray::const_iterator begin,
-              RuneStrArray::const_iterator end,
-              vector<struct DatDag>&res,
-              size_t max_word_len = MAX_WORD_LENGTH) const {
-        dat_.Find(begin, end, res, max_word_len);
-    }
-
-    void FindWordRange(RuneStrArray::const_iterator begin,
-              RuneStrArray::const_iterator end,
-              vector<WordRange>& words,
-              size_t max_word_len = MAX_WORD_LENGTH) const {
-        dat_.Find(begin, end, words, max_word_len);
-    }
-
-    bool IsUserDictSingleChineseWord(const Rune& word) const {
-        return IsIn(user_dict_single_chinese_word_, word);
-    }
-
-    double GetMinWeight() const {
-        return dat_.GetMinWeight();
-    }
-
-    size_t GetTotalDictSize() const {
-        return total_dict_size_;
-    }
-
-    void InserUserDictNode(const string& line, bool saveNodeInfo = true) {
-        vector<string> buf;
-        DatElement node_info;
-        Split(line, buf, " ");
-
-        if (buf.size() == 0) {
-            return;
-        }
-
-        node_info.word = buf[0];
-        node_info.weight = user_word_default_weight_;
-        node_info.tag = UNKNOWN_TAG;
-
-        if (buf.size() == 2) {
-            node_info.tag = buf[1];
-        } else if (buf.size() == 3) {
-            if (freq_sum_ > 0.0) {
-                const int freq = atoi(buf[1].c_str());
-                node_info.weight = log(1.0 * freq / freq_sum_);
-                node_info.tag = buf[2];
-            }
-        }
-
-        if (saveNodeInfo) {
-            static_node_infos_.push_back(node_info);
-        }
-
-        if (Utf8CharNum(node_info.word) == 1) {
-            RuneArray word;
-
-            if (DecodeRunesInString(node_info.word, word)) {
-                user_dict_single_chinese_word_.insert(word[0]);
-            } else {
-                XLOG(ERROR) << "Decode " << node_info.word << " failed.";
-            }
-        }
-    }
-
-    void LoadUserDict(const string& filePaths, bool saveNodeInfo = true) {
-        vector<string> files = limonp::Split(filePaths, "|;");
-
-        for (size_t i = 0; i < files.size(); i++) {
-            ifstream ifs(files[i].c_str());
-            XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
-            string line;
-
-            for (; getline(ifs, line);) {
-                if (line.size() == 0) {
-                    continue;
-                }
-
-                InserUserDictNode(line, saveNodeInfo);
-            }
-        }
-    }
-
-
-private:
-    void Init(const string& dict_path, const string& user_dict_paths, string dat_cache_path,
-              UserWordWeightOption user_word_weight_opt) {
-        const auto dict_list = dict_path + "|" + user_dict_paths;
-        size_t file_size_sum = 0;
-        const string md5 = CalcFileListMD5(dict_list, file_size_sum);
-        total_dict_size_ = file_size_sum;
-
-        if (dat_cache_path.empty()) {
-            dat_cache_path = "/tmp/" + md5 + ".dat_";//未指定词库数据文件存储位置的默认存储在tmp目录下
-        }
-         dat_cache_path += VERSION;
-        QString path = QString::fromStdString(dat_cache_path);
-        qDebug() << "#########Dict path:" << path;
-        if (dat_.InitAttachDat(dat_cache_path, md5)) {
-            LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_;
-            return;
-        }
-
-        LoadDefaultDict(dict_path);
-        freq_sum_ = CalcFreqSum(static_node_infos_);
-        CalculateWeight(static_node_infos_, freq_sum_);
-        double min_weight = 0;
-        SetStaticWordWeights(user_word_weight_opt, min_weight);
-        dat_.SetMinWeight(min_weight);
-
-        LoadUserDict(user_dict_paths);
-        const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
-        assert(build_ret);
-        vector<DatElement>().swap(static_node_infos_);
-    }
-
-    void LoadDefaultDict(const string& filePath) {
-        ifstream ifs(filePath.c_str());
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
-        string line;
-        vector<string> buf;
-
-        for (; getline(ifs, line);) {
-            Split(line, buf, " ");
-            XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
-            DatElement node_info;
-            node_info.word = buf[0];
-            node_info.weight = atof(buf[1].c_str());
-            node_info.tag = buf[2];
-            static_node_infos_.push_back(node_info);
-        }
-    }
-
-    static bool WeightCompare(const DatElement& lhs, const DatElement& rhs) {
-        return lhs.weight < rhs.weight;
-    }
-
-    void SetStaticWordWeights(UserWordWeightOption option, double & min_weight) {
-        XCHECK(!static_node_infos_.empty());
-        vector<DatElement> x = static_node_infos_;
-        sort(x.begin(), x.end(), WeightCompare);
-        if(x.empty()){
-            return;
-        }
-        min_weight = x[0].weight;
-        const double max_weight_ = x[x.size() - 1].weight;
-        const double median_weight_ = x[x.size() / 2].weight;
-
-        switch (option) {
-            case WordWeightMin:
-                user_word_default_weight_ = min_weight;
-                break;
-
-            case WordWeightMedian:
-                user_word_default_weight_ = median_weight_;
-                break;
-
-            default:
-                user_word_default_weight_ = max_weight_;
-                break;
-        }
-    }
-
-    double CalcFreqSum(const vector<DatElement>& node_infos) const {
-        double sum = 0.0;
-
-        for (size_t i = 0; i < node_infos.size(); i++) {
-            sum += node_infos[i].weight;
-        }
-
-        return sum;
-    }
-
-    void CalculateWeight(vector<DatElement>& node_infos, double sum) const {
-        for (size_t i = 0; i < node_infos.size(); i++) {
-            DatElement& node_info = node_infos[i];
-            assert(node_info.weight > 0.0);
-            node_info.weight = log(double(node_info.weight) / sum);
-        }
-    }
-
-private:
-    vector<DatElement> static_node_infos_;
-    size_t total_dict_size_ = 0;
-    DatTrie dat_;
-
-    double freq_sum_;
-    double user_word_default_weight_;
-    unordered_set<Rune> user_dict_single_chinese_word_;
-};
-}
-
--- a/libchinese-segmentation/cppjieba/FullSegment.hpp
+++ b/libchinese-segmentation/cppjieba/FullSegment.hpp
@ -1,67 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <set>
-#include <cassert>
-#include "limonp/Logging.hpp"
-#include "segment-trie/segment-trie.h"
-//#include "DictTrie.hpp"
-#include "SegmentBase.hpp"
-#include "Unicode.hpp"
-
-namespace cppjieba {
-class FullSegment: public SegmentBase {
-public:
-    FullSegment(const DictTrie* dictTrie)
-        : dictTrie_(dictTrie) {
-        assert(dictTrie_);
-    }
-    ~FullSegment() { }
-
-    virtual void Cut(RuneStrArray::const_iterator begin,
-                     RuneStrArray::const_iterator end,
-                     vector<WordRange>& res, bool, size_t) const override {
-        assert(dictTrie_);
-        vector<struct DatDag> dags;
-        dictTrie_->FindDatDag(begin, end, dags);
-        size_t max_word_end_pos = 0;
-
-        for (size_t i = 0; i < dags.size(); i++) {
-            for (const auto & kv : dags[i].nexts) {
-                const size_t nextoffset = kv.first - 1;
-                assert(nextoffset < dags.size());
-                const auto wordLen = nextoffset - i + 1;
-                const bool is_not_covered_single_word = ((dags[i].nexts.size() == 1) && (max_word_end_pos <= i));
-                const bool is_oov = (nullptr == kv.second); //Out-of-Vocabulary
-
-                if ((is_not_covered_single_word) || ((not is_oov) && (wordLen >= 2))) {
-                    WordRange wr(begin + i, begin + nextoffset);
-                    res.push_back(wr);
-                }
-
-                max_word_end_pos = max(max_word_end_pos, nextoffset + 1);
-            }
-        }
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t) const override {
-        std::ignore = s;
-        std::ignore = begin;
-        std::ignore = end;
-        std::ignore = res;
-        std::ignore = hmm;
-    }
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t) const override {
-        std::ignore = s;
-        std::ignore = begin;
-        std::ignore = end;
-        std::ignore = res;
-        std::ignore = hmm;
-    }
-private:
-    const DictTrie* dictTrie_;
-};
-}
-
--- a/libchinese-segmentation/cppjieba/HMMModel.hpp
+++ b/libchinese-segmentation/cppjieba/HMMModel.hpp
@ -1,158 +0,0 @@
-#pragma once
-
-#include "limonp/StringUtil.hpp"
-//#define USE_CEDAR_SEGMENT //使用cedar初步测试性能损失3%-5%左右，内存占用降低近1M
-#ifdef USE_CEDAR_SEGMENT
-#include "cedar/cedar.h"
-#endif
-namespace cppjieba {
-
-using namespace limonp;
-#ifdef USE_CEDAR_SEGMENT
-typedef cedar::da<float, -1, -2, false> EmitProbMap;
-#else
-typedef unordered_map<Rune, double> EmitProbMap;
-#endif
-struct HMMModel {
-    /*
-     * STATUS:
-     * 0: HMMModel::B, 1: HMMModel::E, 2: HMMModel::M, 3:HMMModel::S
-     * */
-    enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
-
-    HMMModel(const string& modelPath) {
-        memset(startProb, 0, sizeof(startProb));
-        memset(transProb, 0, sizeof(transProb));
-        statMap[0] = 'B';
-        statMap[1] = 'E';
-        statMap[2] = 'M';
-        statMap[3] = 'S';
-        emitProbVec.push_back(&emitProbB);
-        emitProbVec.push_back(&emitProbE);
-        emitProbVec.push_back(&emitProbM);
-        emitProbVec.push_back(&emitProbS);
-        LoadModel(modelPath);
-    }
-    ~HMMModel() {
-    }
-    void LoadModel(const string& filePath) {
-        ifstream ifile(filePath.c_str());
-        XCHECK(ifile.is_open()) << "open " << filePath << " failed";
-        string line;
-        vector<string> tmp;
-        vector<string> tmp2;
-        //Load startProb
-        XCHECK(GetLine(ifile, line));
-        Split(line, tmp, " ");
-        XCHECK(tmp.size() == STATUS_SUM);
-
-        for (size_t j = 0; j < tmp.size(); j++) {
-            startProb[j] = atof(tmp[j].c_str());
-        }
-
-        //Load transProb
-        for (size_t i = 0; i < STATUS_SUM; i++) {
-            XCHECK(GetLine(ifile, line));
-            Split(line, tmp, " ");
-            XCHECK(tmp.size() == STATUS_SUM);
-
-            for (size_t j = 0; j < tmp.size(); j++) {
-                transProb[i][j] = atof(tmp[j].c_str());
-            }
-        }
-
-        //Load emitProbB
-        XCHECK(GetLine(ifile, line));
-        XCHECK(LoadEmitProb(line, emitProbB));
-
-        //Load emitProbE
-        XCHECK(GetLine(ifile, line));
-        XCHECK(LoadEmitProb(line, emitProbE));
-
-        //Load emitProbM
-        XCHECK(GetLine(ifile, line));
-        XCHECK(LoadEmitProb(line, emitProbM));
-
-        //Load emitProbS
-        XCHECK(GetLine(ifile, line));
-        XCHECK(LoadEmitProb(line, emitProbS));
-    }
-    double GetEmitProb(const EmitProbMap* ptMp, Rune key,
-                       double defVal)const {
-#ifdef USE_CEDAR_SEGMENT
-        char str_key[8];
-        snprintf(str_key, sizeof(str_key), "%d", key);
-        float result = ptMp->exactMatchSearch<float>(str_key);
-        return result < 0 ? defVal : result;
-#else
-        EmitProbMap::const_iterator cit = ptMp->find(key);
-
-        if (cit == ptMp->end()) {
-            return defVal;
-        }
-
-        return cit->second;
-#endif
-    }
-    bool GetLine(ifstream& ifile, string& line) {
-        while (getline(ifile, line)) {
-            Trim(line);
-
-            if (line.empty()) {
-                continue;
-            }
-
-            if (StartsWith(line, "#")) {
-                continue;
-            }
-
-            return true;
-        }
-
-        return false;
-    }
-    bool LoadEmitProb(const string& line, EmitProbMap& mp) {
-        if (line.empty()) {
-            return false;
-        }
-
-        vector<string> tmp, tmp2;
-        RuneArray unicode;
-        Split(line, tmp, ",");
-
-        for (size_t i = 0; i < tmp.size(); i++) {
-            Split(tmp[i], tmp2, ":");
-
-            if (2 != tmp2.size()) {
-                XLOG(ERROR) << "emitProb illegal.";
-                return false;
-            }
-
-            if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
-                XLOG(ERROR) << "TransCode failed.";
-                return false;
-            }
-#ifdef USE_CEDAR_SEGMENT
-            char str_key[8];
-            snprintf(str_key, sizeof(str_key), "%d", unicode[0]);
-            mp.update(str_key, std::strlen(str_key), atof(tmp2[1].c_str()));
-#else
-            mp[unicode[0]] = atof(tmp2[1].c_str());
-#endif
-        }
-
-        return true;
-    }
-
-    char statMap[STATUS_SUM];
-    double startProb[STATUS_SUM];
-    double transProb[STATUS_SUM][STATUS_SUM];
-    EmitProbMap emitProbB;
-    EmitProbMap emitProbE;
-    EmitProbMap emitProbM;
-    EmitProbMap emitProbS;
-    vector<EmitProbMap* > emitProbVec;
-}; // struct HMMModel
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/HMMSegment.hpp
+++ b/libchinese-segmentation/cppjieba/HMMSegment.hpp
@ -1,206 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <memory.h>
-#include <cassert>
-#include "HMMModel.hpp"
-#include "SegmentBase.hpp"
-
-namespace cppjieba {
-
-const double MIN_DOUBLE = -3.14e+100;
-
-class HMMSegment: public SegmentBase {
-public:
-    HMMSegment(const HMMModel* model)
-        : model_(model) {
-    }
-    ~HMMSegment() { }
-
-    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool,
-                     size_t) const override {
-        RuneStrArray::const_iterator left = begin;
-        RuneStrArray::const_iterator right = begin;
-
-        while (right != end) {
-            if (right->rune < 0x80) { //asc码
-                if (left != right) {
-                    InternalCut(left, right, res);
-                }
-
-                left = right;
-
-                do {
-                    right = SequentialLetterRule(left, end);//非英文字符则返回left，否则返回left后非英文字母的位置
-
-                    if (right != left) {
-                        break;
-                    }
-
-                    right = NumbersRule(left, end);//非数字则返回left，否则返回left后非数字的位置
-
-                    if (right != left) {
-                        break;
-                    }
-
-                    right ++;
-                } while (false);
-
-                WordRange wr(left, right - 1);
-                res.push_back(wr);
-                left = right;
-            } else {
-                right++;
-            }
-        }
-
-        if (left != right) {
-            InternalCut(left, right, res);
-        }
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t) const override {
-        std::ignore = s;
-        std::ignore = begin;
-        std::ignore = end;
-        std::ignore = res;
-        std::ignore = hmm;
-    }
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t) const override {
-        std::ignore = s;
-        std::ignore = begin;
-        std::ignore = end;
-        std::ignore = res;
-        std::ignore = hmm;
-    }
-private:
-    // sequential letters rule
-    RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin,
-                                                      RuneStrArray::const_iterator end) const {
-        Rune x = begin->rune;
-
-        if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
-            begin ++;
-        } else {
-            return begin;
-        }
-
-        while (begin != end) {
-            x = begin->rune;
-
-            if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
-                begin ++;
-            } else {
-                break;
-            }
-        }
-
-        return begin;
-    }
-    //
-    RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
-        Rune x = begin->rune;
-
-        if ('0' <= x && x <= '9') {
-            begin ++;
-        } else {
-            return begin;
-        }
-
-        while (begin != end) {
-            x = begin->rune;
-
-            if (('0' <= x && x <= '9') || x == '.') {
-                begin++;
-            } else {
-                break;
-            }
-        }
-
-        return begin;
-    }
-    void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
-        vector<size_t> status;
-        Viterbi(begin, end, status);
-
-        RuneStrArray::const_iterator left = begin;
-        RuneStrArray::const_iterator right;
-
-        for (size_t i = 0; i < status.size(); i++) {
-            if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
-                right = begin + i + 1;
-                WordRange wr(left, right - 1);
-                res.push_back(wr);
-                left = right;
-            }
-        }
-    }
-
-    void Viterbi(RuneStrArray::const_iterator begin,
-                 RuneStrArray::const_iterator end,
-                 vector<size_t>& status) const {
-        size_t Y = HMMModel::STATUS_SUM;
-        size_t X = end - begin;
-
-        size_t XYSize = X * Y;
-        size_t now, old, stat;
-        double tmp, endE, endS;
-
-        //vector<int> path(XYSize);
-        //vector<double> weight(XYSize);
-        int path[XYSize];
-        double weight[XYSize];
-
-        //start
-        for (size_t y = 0; y < Y; y++) {
-            weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
-            path[0 + y * X] = -1;
-        }
-
-        double emitProb;
-
-        for (size_t x = 1; x < X; x++) {
-            for (size_t y = 0; y < Y; y++) {
-                now = x + y * X;
-                weight[now] = MIN_DOUBLE;
-                path[now] = HMMModel::E; // warning
-                emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE);
-
-                for (size_t preY = 0; preY < Y; preY++) {
-                    old = x - 1 + preY * X;
-                    tmp = weight[old] + model_->transProb[preY][y] + emitProb;
-
-                    if (tmp > weight[now]) {
-                        weight[now] = tmp;
-                        path[now] = preY;
-                    }
-                }
-            }
-        }
-
-        endE = weight[X - 1 + HMMModel::E * X];
-        endS = weight[X - 1 + HMMModel::S * X];
-        stat = 0;
-
-        if (endE >= endS) {
-            stat = HMMModel::E;
-        } else {
-            stat = HMMModel::S;
-        }
-
-        status.resize(X);
-
-        for (int x = X - 1 ; x >= 0; x--) {
-            status[x] = stat;
-            stat = path[x + stat * X];
-        }
-    }
-
-    const HMMModel* model_;
-}; // class HMMSegment
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/IdfTrie.hpp
+++ b/libchinese-segmentation/cppjieba/IdfTrie.hpp
@ -1,117 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <string>
-#include <cstring>
-#include <cstdlib>
-#include <stdint.h>
-#include <cmath>
-#include <limits>
-#include "limonp/StringUtil.hpp"
-#include "limonp/Logging.hpp"
-#include "Unicode.hpp"
-#include "DatTrie.hpp"
-#include <QDebug>
-namespace cppjieba {
-
-using namespace limonp;
-
-const size_t IDF_COLUMN_NUM = 2;
-
-class IdfTrie {
-public:
-    enum UserWordWeightOption {
-        WordWeightMin,
-        WordWeightMedian,
-        WordWeightMax,
-    }; // enum UserWordWeightOption
-
-    IdfTrie(const string& dict_path, const string & dat_cache_path = "",
-             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
-        Init(dict_path, dat_cache_path, user_word_weight_opt);
-    }
-
-    ~IdfTrie() {}
-
-    double Find(const string & word, std::size_t length = 0, std::size_t node_pos = 0) const {
-        return dat_.Find(word, length, node_pos);
-    }
-
-    size_t GetTotalDictSize() const {
-        return total_dict_size_;
-    }
-
-private:
-    void Init(const string& dict_path, string dat_cache_path,
-              UserWordWeightOption user_word_weight_opt) {
-        size_t file_size_sum = 0;
-        const string md5 = CalcFileListMD5(dict_path, file_size_sum);
-        total_dict_size_ = file_size_sum;
-
-        if (dat_cache_path.empty()) {
-            dat_cache_path = "/tmp/" + md5 + ".dat_";//未指定词库数据文件存储位置的默认存储在tmp目录下
-        }
-         dat_cache_path += VERSION;
-        QString path = QString::fromStdString(dat_cache_path);
-        qDebug() << "#########Idf path:" << path;
-        if (dat_.InitIdfAttachDat(dat_cache_path, md5)) {
-            return;
-        }
-
-        LoadDefaultIdf(dict_path);
-        double idf_sum_ = CalcIdfSum(static_node_infos_);
-        assert(static_node_infos_.size());
-        idfAverage_ = idf_sum_ / static_node_infos_.size();
-        assert(idfAverage_ > 0.0);
-        double min_weight = 0;
-        dat_.SetMinWeight(min_weight);
-
-        const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
-        assert(build_ret);
-        vector<IdfElement>().swap(static_node_infos_);
-    }
-
-    void LoadDefaultIdf(const string& filePath) {
-        ifstream ifs(filePath.c_str());
-        if(not ifs.is_open()){
-            return ;
-        }
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
-        string line;
-        vector<string> buf;
-        size_t lineno = 0;
-
-        for (; getline(ifs, line); lineno++) {
-            if (line.empty()) {
-                XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
-                continue;
-            }
-            Split(line, buf, " ");
-            XCHECK(buf.size() == IDF_COLUMN_NUM) << "split result illegal, line:" << line;
-            IdfElement node_info;
-            node_info.word = buf[0];
-            node_info.idf = atof(buf[1].c_str());
-            static_node_infos_.push_back(node_info);
-        }
-    }
-
-    double CalcIdfSum(const vector<IdfElement>& node_infos) const {
-        double sum = 0.0;
-
-        for (size_t i = 0; i < node_infos.size(); i++) {
-            sum += node_infos[i].idf;
-        }
-
-        return sum;
-    }
-public:
-    double idfAverage_;
-private:
-    vector<IdfElement> static_node_infos_;
-    size_t total_dict_size_ = 0;
-    DatTrie dat_;
-};
-}
-
--- a/libchinese-segmentation/cppjieba/Jieba.hpp
+++ b/libchinese-segmentation/cppjieba/Jieba.hpp
@ -1,99 +0,0 @@
-#pragma once
-
-#include <memory>
-#include "QuerySegment.hpp"
-#include "KeywordExtractor.hpp"
-#include "segment-trie/segment-trie.h"
-
-namespace cppjieba {
-
-class Jieba {
-public:
-    Jieba(const string& dict_path,
-          const string& model_path,
-          const string& user_dict_path,
-          const string& idfPath = "",
-          const string& stopWordPath = "",
-          const string& dat_cache_path = "")
-        : dict_trie_(dict_path, user_dict_path, dat_cache_path),
-          model_(model_path),
-          mp_seg_(&dict_trie_),
-          hmm_seg_(&model_),
-          mix_seg_(&dict_trie_, &model_, stopWordPath),
-          full_seg_(&dict_trie_),
-          query_seg_(&dict_trie_, &model_, stopWordPath),
-          extractor(&dict_trie_, &model_, idfPath, dat_cache_path, stopWordPath){ }
-    ~Jieba() { }
-
-    void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
-        mix_seg_.CutToStr(sentence, words, hmm);
-    }
-    void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
-        mix_seg_.CutToWord(sentence, words, hmm);
-    }
-    void CutAll(const string& sentence, vector<string>& words) const {
-        full_seg_.CutToStr(sentence, words);
-    }
-    void CutAll(const string& sentence, vector<Word>& words) const {
-        full_seg_.CutToWord(sentence, words);
-    }
-    void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
-        query_seg_.CutToStr(sentence, words, hmm);
-    }
-    void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
-        query_seg_.CutToWord(sentence, words, hmm);
-    }
-    void CutHMM(const string& sentence, vector<string>& words) const {
-        hmm_seg_.CutToStr(sentence, words);
-    }
-    void CutHMM(const string& sentence, vector<Word>& words) const {
-        hmm_seg_.CutToWord(sentence, words);
-    }
-    void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
-        mp_seg_.CutToStr(sentence, words, false, max_word_len);
-    }
-    void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
-        mp_seg_.CutToWord(sentence, words, false, max_word_len);
-    }
-
-    void Tag(const string& sentence, vector<pair<string, string> >& words) const {
-        mix_seg_.Tag(sentence, words);
-    }
-    string LookupTag(const string &str) const {
-        return mix_seg_.LookupTag(str);
-    }
-
-    void ResetSeparators(const string& s) {
-        //TODO
-        mp_seg_.ResetSeparators(s);
-        hmm_seg_.ResetSeparators(s);
-        mix_seg_.ResetSeparators(s);
-        full_seg_.ResetSeparators(s);
-        query_seg_.ResetSeparators(s);
-    }
-
-    const DictTrie* GetDictTrie() const {
-        return &dict_trie_;
-    }
-
-    const HMMModel* GetHMMModel() const {
-        return &model_;
-    }
-
-private:
-    DictTrie dict_trie_;
-    HMMModel model_;
-
-    // They share the same dict trie and model
-    MPSegment mp_seg_;
-    HMMSegment hmm_seg_;
-    MixSegment mix_seg_;
-    FullSegment full_seg_;
-    QuerySegment query_seg_;
-
-public:
-    KeywordExtractor extractor;
-}; // class Jieba
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
@ -1,100 +0,0 @@
-#pragma once
-
-#include <cmath>
-#include "MixSegment.hpp"
-//#include "IdfTrie.hpp"
-#include "idf-trie/idf-trie.h"
-
-namespace cppjieba {
-
-using namespace limonp;
-using namespace std;
-
-/*utf8*/
-class KeywordExtractor {
-public:
-
-    KeywordExtractor(const DictTrie* dictTrie,
-                     const HMMModel* model,
-                     const string& idfPath,
-                     const string& dat_cache_path,
-                     const string& stopWordPath)
-        : segment_(dictTrie, model, stopWordPath),
-          idf_trie_(idfPath, dat_cache_path){
-    }
-    ~KeywordExtractor() {
-    }
-
-    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
-        vector<KeyWord> topWords;
-        Extract(sentence, topWords, topN);
-
-        for (size_t i = 0; i < topWords.size(); i++) {
-            keywords.push_back(topWords[i].word);
-        }
-    }
-
-    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
-        vector<KeyWord> topWords;
-        Extract(sentence, topWords, topN);
-
-        for (size_t i = 0; i < topWords.size(); i++) {
-            keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
-        }
-    }
-
-    void Extract(const string& sentence, vector<KeyWord>& keywords, size_t topN) const {
-
-        unordered_map<string, KeyWord> wordmap;//插入字符串与Word的map，相同string统计词频叠加权重
-        PreFilter pre_filter(symbols_, sentence);
-        RuneStrArray::const_iterator null_p;
-        WordRange range(null_p, null_p);
-        bool isNull(false);
-        while (pre_filter.Next(range, isNull)) {
-            if (isNull) {
-                continue;
-            }
-            segment_.CutToStr(sentence, range,  wordmap);
-        }
-
-        keywords.clear();
-        keywords.reserve(wordmap.size());
-
-        for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
-            double idf = idf_trie_.Find(itr->first);
-            if (-1 != idf) {//IDF词典查找
-                itr->second.weight *= idf;
-            } else {
-                itr->second.weight *= idf_trie_.GetIdfAverage();
-            }
-
-            itr->second.word = itr->first;
-            keywords.push_back(itr->second);
-        }
-
-        topN = min(topN, keywords.size());
-        partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
-        keywords.resize(topN);
-    }
-private:
-
-    static bool Compare(const KeyWord& lhs, const KeyWord& rhs) {
-        return lhs.weight > rhs.weight;
-    }
-
-    MixSegment segment_;
-    IdfTrie idf_trie_;
-
-
-    unordered_set<Rune> symbols_;
-}; // class KeywordExtractor
-
-inline ostream& operator << (ostream& os, const KeyWord& word) {
-    return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
-           "}";
-}
-
-} // namespace cppjieba
-
-
-
--- a/libchinese-segmentation/cppjieba/MPSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MPSegment.hpp
@ -1,133 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <set>
-#include <cassert>
-#include "limonp/Logging.hpp"
-#include "segment-trie/segment-trie.h"
-//#include "DictTrie.hpp"
-#include "SegmentTagged.hpp"
-#include "PosTagger.hpp"
-
-namespace cppjieba {
-
-class MPSegment: public SegmentTagged {
-public:
-    MPSegment(const DictTrie* dictTrie)
-        : dictTrie_(dictTrie) {
-        assert(dictTrie_);
-    }
-    ~MPSegment() { }
-
-    virtual void Cut(RuneStrArray::const_iterator begin,
-                     RuneStrArray::const_iterator end,
-                     vector<WordRange>& words,
-                     bool, size_t max_word_len) const override {
-        dictTrie_->FindWordRange(begin, end, words, max_word_len);
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t) const override {
-        std::ignore = s;
-        std::ignore = begin;
-        std::ignore = end;
-        std::ignore = res;
-        std::ignore = hmm;
-    }
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t) const override {
-        std::ignore = s;
-        std::ignore = begin;
-        std::ignore = end;
-        std::ignore = res;
-        std::ignore = hmm;
-    }
-    const DictTrie* GetDictTrie() const override {
-        return dictTrie_;
-    }
-
-    bool Tag(const string& src, vector<pair<string, string> >& res) const override {
-        return tagger_.Tag(src, res, *this);
-    }
-
-    bool IsUserDictSingleChineseWord(const Rune& value) const {
-        return dictTrie_->IsUserDictSingleChineseWord(value);
-    }
-private:
-/*
-    void CalcDP(vector<DatDag>& dags) const {
-        double val(0);
-        for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) {
-            rit->max_next = -1;
-            rit->max_weight = MIN_DOUBLE;
-
-            for (const auto & it : rit->nexts) {
-                const auto nextPos = it.first;
-                val = dictTrie_->GetMinWeight();
-
-                if (nullptr != it.second) {
-                    val = it.second->weight;
-                }
-
-                if (nextPos  < dags.size()) {
-                    val += dags[nextPos].max_weight;
-                }
-
-                if ((nextPos <= dags.size()) && (val > rit->max_weight)) {
-                    rit->max_weight = val;
-                    rit->max_next = nextPos;
-                }
-            }
-        }
-    }
-*/
-/*  倒叙方式重写CalcDP函数，初步测试未发现问题*/
-/*
-    void CalcDP(vector<DatDag>& dags) const {
-        double val(0);
-        size_t size = dags.size();
-
-        for (size_t i = 0; i < size; i++) {
-            dags[size - 1 - i].max_next = -1;
-            dags[size - 1 - i].max_weight = MIN_DOUBLE;
-
-            for (const auto & it : dags[size - 1 - i].nexts) {
-                const auto nextPos = it.first;
-                if (nullptr != it.second) {
-                    val = it.second->weight;
-                }
-
-                if (nextPos  < dags.size()) {
-                    val += dags[nextPos].max_weight;
-                }
-
-                if ((nextPos <= dags.size()) && (val > dags[size - 1 - i].max_weight)) {
-                    dags[size - 1 - i].max_weight = val;
-                    dags[size - 1 - i].max_next = nextPos;
-                }
-            }
-        }
-    }
-
-    void CutByDag(RuneStrArray::const_iterator begin,
-                  RuneStrArray::const_iterator,
-                  const vector<DatDag>& dags,
-                  vector<WordRange>& words) const {
-
-        for (size_t i = 0; i < dags.size();) {
-            const auto next = dags[i].max_next;
-            assert(next > i);
-            assert(next <= dags.size());
-            WordRange wr(begin + i, begin + next - 1);
-            words.push_back(wr);
-            i = next;
-        }
-    }
-*///相关功能已集成到Find函数中
-    const DictTrie* dictTrie_;
-    PosTagger tagger_;
-
-}; // class MPSegment
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/MixSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MixSegment.hpp
@ -1,276 +0,0 @@
-#pragma once
-
-#include <cassert>
-#include "MPSegment.hpp"
-#include "HMMSegment.hpp"
-#include "limonp/StringUtil.hpp"
-#include "PosTagger.hpp"
-#define STOP_WORDS_USE_CEDAR_SEGMENT //使用cedar初步测试性能提升3%-5%左右，内存占用降低近不明显
-#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
-#include "cedar/cedar.h"
-#endif
-
-namespace cppjieba {
-class MixSegment: public SegmentTagged {
-public:
-    MixSegment(const DictTrie* dictTrie,
-               const HMMModel* model,
-               const string& stopWordPath)
-        : mpSeg_(dictTrie), hmmSeg_(model) {
-        LoadStopWordDict(stopWordPath);
-    }
-    ~MixSegment() {}
-
-    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
-                     size_t) const override {
-        if (!hmm) {
-            mpSeg_.CutRuneArray(begin, end, res);
-            return;
-        }
-
-        vector<WordRange> words;
-        assert(end >= begin);
-        words.reserve(end - begin);
-        mpSeg_.CutRuneArray(begin, end, words);
-
-        vector<WordRange> hmmRes;
-        hmmRes.reserve(end - begin);
-
-        for (size_t i = 0; i < words.size(); i++) {
-            //if mp Get a word, it's ok, put it into result
-            if (words[i].left != words[i].right || (words[i].left == words[i].right &&
-                                                    mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
-                res.push_back(words[i]);
-                continue;
-            }
-
-            // if mp Get a single one and it is not in userdict, collect it in sequence
-            size_t j = i;
-
-            while (j < words.size() && words[j].left == words[j].right &&
-                   !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
-                j++;
-            }
-
-            // Cut the sequence with hmm
-            assert(j - 1 >= i);
-            // TODO
-            hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
-
-            //put hmm result to result
-            for (size_t k = 0; k < hmmRes.size(); k++) {
-                res.push_back(hmmRes[k]);
-            }
-
-            //clear tmp vars
-            hmmRes.clear();
-
-            //let i jump over this piece
-            i = j - 1;
-        }
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t) const override {
-        //目前hmm默认开启，后期如有需要关闭再修改--jxx20210519
-//        if (!hmm) {
-//            mpSeg_.CutRuneArray(begin, end, res);
-//            return;
-//        }
-        std::ignore = hmm;
-        vector<WordRange> words;
-        assert(end >= begin);
-        words.reserve(end - begin);
-        mpSeg_.CutRuneArray(begin, end, words);
-
-        vector<WordRange> hmmRes;
-        hmmRes.reserve(end - begin);
-
-        for (size_t i = 0; i < words.size(); i++) {
-            //if mp Get a word, it's ok, put it into result
-            if (words[i].left != words[i].right) {
-                res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
-                continue;
-            }
-            if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
-                    || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
-                res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
-                continue;
-            }
-
-            // if mp Get a single one and it is not in userdict, collect it in sequence
-            size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里（i字符不是最后一个字符），直接判定j字符
-
-            while (j < (words.size() - 1) && words[j].left == words[j].right &&
-                   !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
-                j++;
-            }
-
-            // Cut the sequence with hmm
-            assert(j - 1 >= i);
-            // TODO
-            hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
-
-            //put hmm result to result
-            for (size_t k = 0; k < hmmRes.size(); k++) {
-                res.push_back(GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right));
-            }
-
-            //clear tmp vars
-            hmmRes.clear();
-
-            //let i jump over this piece
-            i = j - 1;
-        }
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t) const override {
-        std::ignore = hmm;
-        vector<WordRange> words;
-        vector<WordRange> hmmRes;
-        assert(end >= begin);
-        if (3 == begin->len or 4 == begin->len) {
-            words.reserve(end - begin);
-            mpSeg_.CutRuneArray(begin, end, words);
-            hmmRes.reserve(words.size());
-        } else {
-            hmmRes.reserve(end - begin);
-        }
-
-        if (words.size() != 0) {//存在中文分词结果
-            for (size_t i = 0; i < words.size(); i++) {
-
-                string str = GetStringFromRunes(s, words[i].left, words[i].right);
-
-                if (words[i].left != words[i].right) {
-#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
-                    if (0 < stopWords_.exactMatchSearch<int>(str.c_str(), str.size())) {
-                        continue;
-                    }
-#else
-                    if (stopWords_.find(str) != stopWords_.end()) {
-                        continue;
-                    }
-#endif
-                    res[str].offsets.push_back(words[i].left->offset);
-                    res[str].weight += 1.0;
-                    continue;
-                }
-
-                if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
-                        || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
-#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
-                    if (0 < stopWords_.exactMatchSearch<int>(str.c_str(), str.size())) {
-                        continue;
-                    }
-#else
-                    if (stopWords_.find(str) != stopWords_.end()) {
-                        continue;
-                    }
-#endif
-                    res[str].offsets.push_back(words[i].left->offset);
-                    res[str].weight += 1.0;
-                    continue;
-                }
-                // if mp Get a single one and it is not in userdict, collect it in sequence
-                size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里（i字符不是最后一个字符），直接判定j字符
-                bool isLastWordsSingle(false);
-                while (j <= (words.size() - 1)
-                       && words[j].left == words[j].right
-                       && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
-                    if (j == (words.size() - 1)) {//最后一个分词结果是单字
-                        isLastWordsSingle = true;
-                        break;
-                    }
-                    j++;
-                }
-
-                // Cut the sequence with hmm
-                assert(j - 1 >= i);
-                // TODO
-                if (isLastWordsSingle) {
-                    hmmSeg_.CutRuneArray(words[i].left, words[j].left + 1, hmmRes);
-                } else {
-                    hmmSeg_.CutRuneArray(words[i].left, words[j].left, hmmRes);
-                }
-
-                //put hmm result to result
-                for (size_t k = 0; k < hmmRes.size(); k++) {
-                    string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right);
-#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
-                    if (0 < stopWords_.exactMatchSearch<int>(hmmStr.c_str(), hmmStr.size())) {
-                        continue;
-                    }
-#else
-                    if (/*IsSingleWord(hmmStr) || */stopWords_.find(hmmStr) != stopWords_.end()) {
-                        continue;
-                    }
-#endif
-
-                    res[hmmStr].offsets.push_back(hmmRes[k].left->offset);
-                    res[hmmStr].weight += 1.0;
-                }
-
-                //clear tmp vars
-                hmmRes.clear();
-
-                //let i jump over this piece
-                if (isLastWordsSingle) {
-                    break;
-                }
-                i = j - 1;
-            }
-        } else {//不存在中文分词结果
-            for (size_t i = 0; i < (size_t)(end - begin); i++) {
-                string str = s.substr((begin+i)->offset, (begin+i)->len);
-                res[str].offsets.push_back((begin+i)->offset);
-                res[str].weight += 1.0;
-            }
-        }
-    }
-
-    const DictTrie* GetDictTrie() const override {
-        return mpSeg_.GetDictTrie();
-    }
-
-    bool Tag(const string& src, vector<pair<string, string> >& res) const override {
-        return tagger_.Tag(src, res, *this);
-    }
-
-    string LookupTag(const string &str) const {
-        return tagger_.LookupTag(str, *this);
-    }
-
-    void LoadStopWordDict(const string& filePath) {
-        ifstream ifs(filePath.c_str());
-        if(not ifs.is_open()){
-            return ;
-        }
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed";
-        string line ;
-
-        while (getline(ifs, line)) {
-#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
-            stopWords_.update(line.c_str(), line.size(), 1);
-#else
-            stopWords_.insert(line);
-#endif
-        }
-
-        assert(stopWords_.size());
-    }
-private:
-#ifdef STOP_WORDS_USE_CEDAR_SEGMENT
-    cedar::da<int, -1, -2, false> stopWords_;
-#else
-    unordered_set<string> stopWords_;
-#endif
-    MPSegment mpSeg_;
-    HMMSegment hmmSeg_;
-    PosTagger tagger_;
-
-}; // class MixSegment
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/PinYinTrie.hpp
+++ b/libchinese-segmentation/cppjieba/PinYinTrie.hpp
@ -1,154 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <string>
-#include <cstring>
-#include <cstdlib>
-#include <stdint.h>
-#include <cmath>
-#include <limits>
-#include "limonp/StringUtil.hpp"
-#include "limonp/Logging.hpp"
-#include "Unicode.hpp"
-#include "DatTrie.hpp"
-#include <QDebug>
-namespace cppjieba {
-
-using namespace limonp;
-
-const size_t PINYIN_COLUMN_NUM = 2;
-
-class PinYinTrie {
-public:
-    enum UserWordWeightOption {
-        WordWeightMin,
-        WordWeightMedian,
-        WordWeightMax,
-    }; // enum UserWordWeightOption
-
-    PinYinTrie(const string& dict_path, const string & dat_cache_path = "",
-             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
-        Init(dict_path, dat_cache_path, user_word_weight_opt);
-    }
-
-    ~PinYinTrie() {}
-
-    int getMultiTonResults(string word, QStringList &results) {
-        if (qmap_chinese2pinyin.contains(QString::fromStdString(word))) {
-            for (auto i:qmap_chinese2pinyin[QString::fromStdString(word)])
-                results.push_back(i);
-            return 0;
-        }
-        return -1;
-    }
-
-    int getSingleTonResult(string word, QString &result) {
-        const PinYinMemElem * tmp = dat_.PinYinFind(word);
-        if (tmp) {
-            result = QString::fromStdString(tmp->GetTag());
-            return 0;
-        }
-        return -1;
-    }
-
-    bool contains(string &word) {
-        if (qmap_chinese2pinyin.contains(QString::fromStdString(word))
-                or !dat_.PinYinFind(word))
-            return true;
-//        if (map_chinese2pinyin.contains(word)
-//                or !dat_.PinYinFind(word))
-//            return true;
-        return false;
-    }
-
-    bool isMultiTone(const string &word) {
-        if (qmap_chinese2pinyin.contains(QString::fromStdString(word)))
-            return true;
-//        if (map_chinese2pinyin.contains(word))
-//            return true;
-        return false;
-    }
-
-    size_t GetTotalDictSize() const {
-        return total_dict_size_;
-    }
-
-private:
-    void Init(const string& dict_path, string dat_cache_path,
-              UserWordWeightOption user_word_weight_opt) {
-        size_t file_size_sum = 0;
-        vector<PinYinElement> node_infos;
-        const string md5 = CalcFileListMD5(dict_path, file_size_sum);
-        total_dict_size_ = file_size_sum;
-
-        if (dat_cache_path.empty()) {
-            //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
-            dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) +  ".dat_cache";
-        }
-        QString path = QString::fromStdString(dat_cache_path);
-        qDebug() << "#########PinYin path:" << path << file_size_sum;
-        if (dat_.InitPinYinAttachDat(dat_cache_path, md5)) {
-            //多音字仍需遍历文件信息
-            LoadDefaultPinYin(node_infos, dict_path, true);
-            return;
-        }
-
-        LoadDefaultPinYin(node_infos, dict_path, false);
-        double min_weight = 0;
-        dat_.SetMinWeight(min_weight);
-
-        const auto build_ret = dat_.InitBuildDat(node_infos, dat_cache_path, md5);
-        assert(build_ret);
-        vector<PinYinElement>().swap(node_infos);
-    }
-
-    void LoadDefaultPinYin(vector<PinYinElement> &node_infos, const string& filePath, bool multiFlag) {
-        ifstream ifs(filePath.c_str());
-        if(not ifs.is_open()){
-            return ;
-        }
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
-        string line;
-        vector<string> buf;
-        size_t lineno = 0;
-
-        for (; getline(ifs, line); lineno++) {
-            if (line.empty()) {
-                XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
-                continue;
-            }
-            Split(line, buf, " ");
-            if (buf.size() == PINYIN_COLUMN_NUM) {
-                if (multiFlag) {//非多音字
-                    continue;
-                }
-                PinYinElement node_info;
-                node_info.word = buf[1];
-                node_info.tag = buf[0];
-                node_infos.push_back(node_info);
-            } else {//多音字
-                QString content = QString::fromUtf8(line.c_str());
-                qmap_chinese2pinyin[content.split(" ").last().trimmed()] = content.split(" ");
-                qmap_chinese2pinyin[content.split(" ").last().trimmed()].pop_back();
-                /*
-                 //std map string list
-                 list<string> tmpList;
-                 for(int i = 0; i < buf.size() - 1; ++i){
-                    tmpList.push_back(buf[i]);
-                 }
-                 map[buf[buf.size() - 1]] = tmpList;
-                */
-            }
-        }
-    }
-
-private:
-    QMap<QString, QStringList> qmap_chinese2pinyin;
-    //map<string, list<string>> map_chinese2pinyin;
-    size_t total_dict_size_ = 0;
-    DatTrie dat_;
-};
-}
-
--- a/libchinese-segmentation/cppjieba/PosTagger.hpp
+++ b/libchinese-segmentation/cppjieba/PosTagger.hpp
@ -1,84 +0,0 @@
-#pragma once
-
-#include "limonp/StringUtil.hpp"
-#include "segment-trie/segment-trie.h"
-//#include "DictTrie.hpp"
-//#include "SegmentTagged.hpp"
-
-namespace cppjieba {
-using namespace limonp;
-
-static const char* const POS_M = "m";
-static const char* const POS_ENG = "eng";
-static const char* const POS_X = "x";
-
-class PosTagger {
-public:
-    PosTagger() {
-    }
-    ~PosTagger() {
-    }
-
-    bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
-        vector<string> CutRes;
-        segment.CutToStr(src, CutRes);
-
-        for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
-            res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
-        }
-
-        return !res.empty();
-    }
-
-    string LookupTag(const string &str, const SegmentTagged& segment) const {
-        const DictTrie * dict = segment.GetDictTrie();
-        assert(dict != nullptr);
-        const auto tmp = dict->Find(str);
-
-        if (tmp == nullptr || tmp->GetTag().empty()) {
-            RuneStrArray runes;
-
-            if (!DecodeRunesInString(str, runes)) {
-                XLOG(ERROR) << "Decode failed.";
-                return POS_X;
-            }
-
-            return SpecialRule(runes);
-        } else {
-            return tmp->GetTag();
-        }
-    }
-
-private:
-    const char* SpecialRule(const RuneStrArray& unicode) const {
-        size_t m = 0;
-        size_t eng = 0;
-
-        for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
-            if (unicode[i].rune < 0x80) {
-                eng ++;
-
-                if ('0' <= unicode[i].rune && unicode[i].rune <= '9') {
-                    m++;
-                }
-            }
-        }
-
-        // ascii char is not found
-        if (eng == 0) {
-            return POS_X;
-        }
-
-        // all the ascii is number char
-        if (m == eng) {
-            return POS_M;
-        }
-
-        // the ascii chars contain english letter
-        return POS_ENG;
-    }
-
-}; // class PosTagger
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/PreFilter.hpp
+++ b/libchinese-segmentation/cppjieba/PreFilter.hpp
@ -1,127 +0,0 @@
-#pragma once
-
-#include "limonp/Logging.hpp"
-#include <unordered_set>
-#include "Unicode.hpp"
-
-namespace cppjieba {
-
-class PreFilter {
-public:
-    PreFilter(const std::unordered_set<Rune>& symbols,
-              const string& sentence)
-        : symbols_(symbols) {
-        if (!DecodeRunesInString(sentence, sentence_)) {
-            XLOG(ERROR) << "decode failed. "<<sentence;
-        }
-
-        cursor_ = sentence_.begin();
-    }
-    ~PreFilter() {
-    }
-    bool HasNext() const {
-        return cursor_ != sentence_.end();
-    }
-    bool Next(WordRange& wordRange) {
-
-        if (cursor_ == sentence_.end()) {
-            return false;
-        }
-
-        wordRange.left = cursor_;
-
-        while (cursor_->rune == 0x20 && cursor_ != sentence_.end()) {
-            cursor_++;
-        }
-
-        if (cursor_ == sentence_.end()) {
-            wordRange.right = cursor_;
-            return true;
-        }
-
-        while (++cursor_ != sentence_.end()) {
-            if (cursor_->rune == 0x20) {
-                wordRange.right = cursor_;
-                return true;
-            }
-        }
-
-        wordRange.right = sentence_.end();
-        return true;
-    }
-
-    bool Next(WordRange& wordRange, bool& isNull) {
-        isNull = false;
-        if (cursor_ == sentence_.end()) {
-            return false;
-        }
-
-        wordRange.left = cursor_;
-        if (cursor_->rune == 0x20) {
-            while (cursor_ != sentence_.end()) {
-                if (cursor_->rune != 0x20) {
-                    if (wordRange.left == cursor_) {
-                        cursor_ ++;
-                    }
-                    wordRange.right = cursor_;
-                    isNull = true;
-                    return true;
-                }
-                cursor_ ++;
-            }
-            return false;
-        }
-
-        int max_num = 0;
-        uint32_t utf8_num = cursor_->len;
-
-        while (cursor_ != sentence_.end()) {
-            if (cursor_->rune == 0x20) {
-                if (wordRange.left == cursor_) {
-                    cursor_ ++;
-                }
-
-                wordRange.right = cursor_;
-                return true;
-            }
-
-            cursor_ ++;
-            max_num++;
-            if (max_num >= 1024 or cursor_->len != utf8_num) { //todo 防止一次性传入过多字节，暂定限制为1024个字
-                wordRange.right = cursor_;
-                return true;
-            }
-        }
-
-        wordRange.right = sentence_.end();
-        return true;
-    }
-
-    WordRange Next() {
-        WordRange range(cursor_, cursor_);
-
-        while (cursor_ != sentence_.end()) {
-            //if (IsIn(symbols_, cursor_->rune)) {
-            if (cursor_->rune == 0x20) {
-                if (range.left == cursor_) {
-                    cursor_ ++;
-                }
-
-                range.right = cursor_;
-                return range;
-            }
-
-            cursor_ ++;
-        }
-
-        range.right = sentence_.end();
-        return range;
-    }
-private:
-    RuneStrArray::const_iterator cursor_;
-    RuneStrArray sentence_;
-    const std::unordered_set<Rune>& symbols_;
-}; // class PreFilter
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/QuerySegment.hpp
+++ b/libchinese-segmentation/cppjieba/QuerySegment.hpp
@ -1,89 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <set>
-#include <cassert>
-#include "limonp/Logging.hpp"
-#include "SegmentBase.hpp"
-#include "FullSegment.hpp"
-#include "MixSegment.hpp"
-#include "Unicode.hpp"
-
-namespace cppjieba {
-class QuerySegment: public SegmentBase {
-public:
-    QuerySegment(const DictTrie* dictTrie,
-                 const HMMModel* model,
-                 const string& stopWordPath)
-        : mixSeg_(dictTrie, model, stopWordPath), trie_(dictTrie) {
-    }
-    ~QuerySegment() {
-    }
-
-    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
-                     size_t) const override {
-        //use mix Cut first
-        vector<WordRange> mixRes;
-        mixSeg_.CutRuneArray(begin, end, mixRes, hmm);
-
-        vector<WordRange> fullRes;
-
-        for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
-            if (mixResItr->Length() > 2) {
-                for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
-                    string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 2);
-
-                    if (trie_->Find(text) != nullptr) {
-                        WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
-                        res.push_back(wr);
-                    }
-                }
-            }
-
-            if (mixResItr->Length() > 3) {
-                for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
-                    string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 3);
-
-                    if (trie_->Find(text) != nullptr) {
-                        WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
-                        res.push_back(wr);
-                    }
-                }
-            }
-
-            res.push_back(*mixResItr);
-        }
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t) const override {
-        std::ignore = s;
-        std::ignore = begin;
-        std::ignore = end;
-        std::ignore = res;
-        std::ignore = hmm;
-    }
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t) const override {
-        std::ignore = s;
-        std::ignore = begin;
-        std::ignore = end;
-        std::ignore = res;
-        std::ignore = hmm;
-    }
-private:
-    bool IsAllAscii(const RuneArray& s) const {
-        for (size_t i = 0; i < s.size(); i++) {
-            if (s[i] >= 0x80) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-    MixSegment mixSeg_;
-    const DictTrie* trie_;
-}; // QuerySegment
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/SegmentBase.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentBase.hpp
@ -1,94 +0,0 @@
-#pragma once
-
-#include "limonp/Logging.hpp"
-#include "PreFilter.hpp"
-#include <cassert>
-
-
-namespace cppjieba {
-
-const char* const SPECIAL_SEPARATORS = " \t\n\xEF\xBC\x8C\xE3\x80\x82";
-
-using namespace limonp;
-
-class SegmentBase {
-public:
-    SegmentBase() {
-        XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
-    }
-    virtual ~SegmentBase() { }
-
-    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
-                     size_t max_word_len) const = 0;
-    //添加基于sentence的cut方法，减少中间变量的存储与格式转换--jxx20210517
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t max_word_len) const = 0;
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t max_word_len) const = 0;
-    //重写CutToStr函数，简化获取vector<string>& words的流程，降低内存占用--jxx20210517
-    void CutToStr(const string& sentence, vector<string>& words, bool hmm = true,
-                  size_t max_word_len = MAX_WORD_LENGTH) const {
-        PreFilter pre_filter(symbols_, sentence);
-        words.clear();
-        words.reserve(sentence.size() / 2);//todo 参考源码，参数待定
-        RuneStrArray::const_iterator null_p;
-        WordRange range(null_p, null_p);
-        while (pre_filter.Next(range)) {
-            CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
-        }
-    }
-    void CutToStr(const string& sentence, WordRange range, vector<string>& words, bool hmm = true,
-                  size_t max_word_len = MAX_WORD_LENGTH) const {
-        CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
-    }
-    void CutToStr(const string& sentence, WordRange range, unordered_map<string, KeyWord>& words, bool hmm = true,
-                  size_t max_word_len = MAX_WORD_LENGTH) const {
-        CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
-    }
-    void CutToWord(const string& sentence, vector<Word>& words, bool hmm = true,
-                   size_t max_word_len = MAX_WORD_LENGTH) const {
-        PreFilter pre_filter(symbols_, sentence);
-        vector<WordRange> wrs;
-        wrs.reserve(sentence.size() / 2);
-
-        while (pre_filter.HasNext()) {
-            auto range = pre_filter.Next();
-            Cut(range.left, range.right, wrs, hmm, max_word_len);
-        }
-
-        words.clear();
-        words.reserve(wrs.size());
-        GetWordsFromWordRanges(sentence, wrs, words);
-        wrs.clear();
-        vector<WordRange>().swap(wrs);
-    }
-
-    void CutRuneArray(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res,
-                      bool hmm = true, size_t max_word_len = MAX_WORD_LENGTH) const {
-        Cut(begin, end, res, hmm, max_word_len);
-    }
-
-    bool ResetSeparators(const string& s) {
-        symbols_.clear();
-        RuneStrArray runes;
-
-        if (!DecodeRunesInString(s, runes)) {
-            XLOG(ERROR) << "decode " << s << " failed";
-            return false;
-        }
-
-        for (size_t i = 0; i < runes.size(); i++) {
-            if (!symbols_.insert(runes[i].rune).second) {
-                XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
-                return false;
-            }
-        }
-
-        return true;
-    }
-protected:
-    unordered_set<Rune> symbols_;
-}; // class SegmentBase
-
-} // cppjieba
-
--- a/libchinese-segmentation/cppjieba/SegmentTagged.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentTagged.hpp
@ -1,21 +0,0 @@
-#pragma once
-
-#include "SegmentBase.hpp"
-
-namespace cppjieba {
-
-class SegmentTagged : public SegmentBase {
-public:
-    SegmentTagged() {
-    }
-    virtual ~SegmentTagged() {
-    }
-
-    virtual bool Tag(const string& src, vector<pair<string, string> >& res) const = 0;
-
-    virtual const DictTrie* GetDictTrie() const = 0;
-
-}; // class SegmentTagged
-
-} // cppjieba
-
--- a/libchinese-segmentation/cppjieba/TextRankExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/TextRankExtractor.hpp
@ -1,205 +0,0 @@
-
-#include <cmath>
-#include "Jieba.hpp"
-
-namespace cppjieba {
-using namespace limonp;
-using namespace std;
-
-class TextRankExtractor {
-public:
-    typedef struct _Word {
-        string word;
-        vector<size_t> offsets;
-        double weight;
-    }    Word; // struct Word
-private:
-    typedef std::map<string, Word> WordMap;
-
-    class WordGraph {
-    private:
-        typedef double Score;
-        typedef string Node;
-        typedef std::set<Node> NodeSet;
-
-        typedef std::map<Node, double> Edges;
-        typedef std::map<Node, Edges> Graph;
-        //typedef std::unordered_map<Node,double> Edges;
-        //typedef std::unordered_map<Node,Edges> Graph;
-
-        double d;
-        Graph graph;
-        NodeSet nodeSet;
-    public:
-        WordGraph(): d(0.85) {};
-        WordGraph(double in_d): d(in_d) {};
-
-        void addEdge(Node start, Node end, double weight) {
-            Edges temp;
-            Edges::iterator gotEdges;
-            nodeSet.insert(start);
-            nodeSet.insert(end);
-            graph[start][end] += weight;
-            graph[end][start] += weight;
-        }
-
-        void rank(WordMap &ws, size_t rankTime = 10) {
-            WordMap outSum;
-            Score wsdef, min_rank, max_rank;
-
-            if (graph.size() == 0) {
-                return;
-            }
-
-            wsdef = 1.0 / graph.size();
-
-            for (Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) {
-                // edges->first start节点；edge->first end节点；edge->second 权重
-                ws[edges->first].word = edges->first;
-                ws[edges->first].weight = wsdef;
-                outSum[edges->first].weight = 0;
-
-                for (Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) {
-                    outSum[edges->first].weight += edge->second;
-                }
-            }
-
-            //sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
-            for (size_t i = 0; i < rankTime; i++) {
-                for (NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) {
-                    double s = 0;
-
-                    for (Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
-                        // edge->first end节点；edge->second 权重
-                    {
-                        s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
-                    }
-
-                    ws[*node].weight = (1 - d) + d * s;
-                }
-            }
-
-            min_rank = max_rank = ws.begin()->second.weight;
-
-            for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
-                if (i->second.weight < min_rank) {
-                    min_rank = i->second.weight;
-                }
-
-                if (i->second.weight > max_rank) {
-                    max_rank = i->second.weight;
-                }
-            }
-
-            for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
-                ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
-            }
-        }
-    };
-
-public:
-    TextRankExtractor(const DictTrie* dictTrie,
-                      const HMMModel* model,
-                      const string& stopWordPath)
-        : segment_(dictTrie, model) {
-        LoadStopWordDict(stopWordPath);
-    }
-    TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
-        LoadStopWordDict(stopWordPath);
-    }
-    ~TextRankExtractor() {
-    }
-
-    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
-        vector<Word> topWords;
-        Extract(sentence, topWords, topN);
-
-        for (size_t i = 0; i < topWords.size(); i++) {
-            keywords.push_back(topWords[i].word);
-        }
-    }
-
-    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
-        vector<Word> topWords;
-        Extract(sentence, topWords, topN);
-
-        for (size_t i = 0; i < topWords.size(); i++) {
-            keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
-        }
-    }
-
-    void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
-        vector<string> words;
-        segment_.CutToStr(sentence, words);
-
-        TextRankExtractor::WordGraph graph;
-        WordMap wordmap;
-        size_t offset = 0;
-
-        for (size_t i = 0; i < words.size(); i++) {
-            size_t t = offset;
-            offset += words[i].size();
-
-            if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
-                continue;
-            }
-
-            for (size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) {
-                if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
-                    skip++;
-                    continue;
-                }
-
-                graph.addEdge(words[i], words[j], 1);
-            }
-
-            wordmap[words[i]].offsets.push_back(t);
-        }
-
-        if (offset != sentence.size()) {
-            XLOG(ERROR) << "words illegal";
-            return;
-        }
-
-        graph.rank(wordmap, rankTime);
-
-        keywords.clear();
-        keywords.reserve(wordmap.size());
-
-        for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
-            keywords.push_back(itr->second);
-        }
-
-        topN = min(topN, keywords.size());
-        partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
-        keywords.resize(topN);
-    }
-private:
-    void LoadStopWordDict(const string& filePath) {
-        ifstream ifs(filePath.c_str());
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed";
-        string line ;
-
-        while (getline(ifs, line)) {
-            stopWords_.insert(line);
-        }
-
-        assert(stopWords_.size());
-    }
-
-    static bool Compare(const Word &x, const Word &y) {
-        return x.weight > y.weight;
-    }
-
-    MixSegment segment_;
-    unordered_set<string> stopWords_;
-}; // class TextRankExtractor
-
-inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
-    return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
-           "}";
-}
-} // namespace cppjieba
-
-
-
--- a/libchinese-segmentation/cppjieba/Unicode.hpp
+++ b/libchinese-segmentation/cppjieba/Unicode.hpp
@ -1,264 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-#include <stdlib.h>
-#include <string>
-#include <vector>
-#include <ostream>
-#include "limonp/LocalVector.hpp"
-#include "limonp/StringUtil.hpp"
-#include "common-struct.h"
-
-namespace cppjieba {
-
-using std::string;
-using std::vector;
-
-typedef uint32_t Rune;
-
-inline std::ostream& operator << (std::ostream& os, const Word& w) {
-    return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
-}
-
-struct DatMemElem {
-    double weight = 0.0;
-    char tag[8] = {};
-
-    void SetTag(const string & str) {
-        memset(&tag[0], 0, sizeof(tag));
-        strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
-    }
-
-    string GetTag() const {
-        return &tag[0];
-    }
-};
-
-struct DatDag {
-    limonp::LocalVector<pair<size_t, const DatMemElem *> > nexts;
-    //double max_weight;
-    //size_t max_next;
-};
-
-struct RuneInfo {
-    Rune rune;
-    uint32_t offset;
-    uint32_t len;
-    uint32_t unicode_offset = 0;
-    uint32_t unicode_length = 0;
-    RuneInfo(): rune(0), offset(0), len(0) {
-    }
-    RuneInfo(Rune r, uint32_t o, uint32_t l)
-        : rune(r), offset(o), len(l) {
-    }
-    RuneInfo(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
-        : rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
-    }
-}; // struct RuneInfo
-
-inline std::ostream& operator << (std::ostream& os, const RuneInfo& r) {
-    return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
-}
-
-typedef limonp::LocalVector<Rune> RuneArray;
-typedef limonp::LocalVector<struct RuneInfo> RuneStrArray;
-
-// [left, right]
-struct WordRange {
-    RuneStrArray::const_iterator left;
-    RuneStrArray::const_iterator right;
-    WordRange(RuneStrArray::const_iterator l, RuneStrArray::const_iterator r)
-        : left(l), right(r) {
-    }
-    size_t Length() const {
-        return right - left;
-    }
-
-    bool IsAllAscii() const {
-        for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
-            if (iter->rune >= 0x80) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-}; // struct WordRange
-
-
-inline bool DecodeRunesInString(const string& s, RuneArray& arr) {
-    arr.clear();
-    return limonp::Utf8ToUnicode32(s, arr);
-}
-
-inline RuneArray DecodeRunesInString(const string& s) {
-    RuneArray result;
-    DecodeRunesInString(s, result);
-    return result;
-}
-
-inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
-
-    uint32_t tmp;
-    uint32_t offset = 0;
-    runes.clear();
-    uint32_t len(0);
-    for (size_t i = 0; i < s.size();) {
-      if (!(s.data()[i] & 0x80)) { // 0xxxxxxx
-        // 7bit, total 7bit
-        tmp = (uint8_t)(s.data()[i]) & 0x7f;
-        i++;
-        len = 1;
-      } else if ((uint8_t)s.data()[i] <= 0xdf && i + 1 < s.size()) { // 110xxxxxx
-        // 5bit, total 5bit
-        tmp = (uint8_t)(s.data()[i]) & 0x1f;
-
-        // 6bit, total 11bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
-        i += 2;
-        len = 2;
-      } else if((uint8_t)s.data()[i] <= 0xef && i + 2 < s.size()) { // 1110xxxxxx
-        // 4bit, total 4bit
-        tmp = (uint8_t)(s.data()[i]) & 0x0f;
-
-        // 6bit, total 10bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
-
-        // 6bit, total 16bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
-
-        i += 3;
-        len = 3;
-      } else if((uint8_t)s.data()[i] <= 0xf7 && i + 3 < s.size()) { // 11110xxxx
-        // 3bit, total 3bit
-        tmp = (uint8_t)(s.data()[i]) & 0x07;
-
-        // 6bit, total 9bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
-
-        // 6bit, total 15bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
-
-        // 6bit, total 21bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+3]) & 0x3f;
-
-        i += 4;
-        len = 4;
-      } else {
-        return false;
-      }
-      RuneInfo x(tmp, offset, len, i, 1);
-      runes.push_back(x);
-      offset += len;
-    }
-    return true;
-}
-
-class RunePtrWrapper {
-public:
-    const RuneInfo * m_ptr = nullptr;
-
-public:
-    explicit RunePtrWrapper(const RuneInfo * p) : m_ptr(p) {}
-
-    uint32_t operator *() {
-        return m_ptr->rune;
-    }
-
-    RunePtrWrapper operator ++(int) {
-        m_ptr ++;
-        return RunePtrWrapper(m_ptr);
-    }
-
-    bool operator !=(const RunePtrWrapper & b) const {
-        return this->m_ptr != b.m_ptr;
-    }
-};
-
-inline string EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) {
-    string str;
-    RunePtrWrapper it_begin(begin), it_end(end);
-    limonp::Unicode32ToUtf8(it_begin, it_end, str);
-    return str;
-}
-
-inline void EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, string& str) {
-    RunePtrWrapper it_begin(begin), it_end(end);
-    limonp::Unicode32ToUtf8(it_begin, it_end, str);
-    return;
-}
-
-class Unicode32Counter {
-public :
-    size_t length = 0;
-    void clear() {
-        length = 0;
-    }
-    void push_back(uint32_t) {
-        ++length;
-    }
-};
-
-inline size_t Utf8CharNum(const char * str, size_t length) {
-    Unicode32Counter c;
-
-    if (limonp::Utf8ToUnicode32(str, length, c)) {
-        return c.length;
-    }
-
-    return 0;
-}
-
-inline size_t Utf8CharNum(const string & str) {
-    return Utf8CharNum(str.data(), str.size());
-}
-
-inline bool IsSingleWord(const string& str) {
-    return Utf8CharNum(str) == 1;
-}
-
-
-// [left, right]
-inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
-    assert(right->offset >= left->offset);
-    uint32_t len = right->offset - left->offset + right->len;
-    uint32_t unicode_length = right->unicode_offset - left->unicode_offset + right->unicode_length;
-    return Word(s.substr(left->offset, len), left->offset, left->unicode_offset, unicode_length);
-}
-
-inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
-    assert(right->offset >= left->offset);
-    //uint32_t len = right->offset - left->offset + right->len;
-    return s.substr(left->offset, right->offset - left->offset + right->len);
-}
-
-inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
-    for (size_t i = 0; i < wrs.size(); i++) {
-        words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
-    }
-}
-
-inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
-    for (size_t i = 0; i < wrs.size(); i++) {
-        words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
-    }
-}
-
-inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
-    strs.resize(words.size());
-
-    for (size_t i = 0; i < words.size(); ++i) {
-        strs[i] = words[i].word;
-    }
-}
-
-const size_t MAX_WORD_LENGTH = 512;
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/cppjieba.pri
+++ b/libchinese-segmentation/cppjieba/cppjieba.pri
@ -1,43 +0,0 @@
-INCLUDEPATH += $$PWD
-
-HEADERS += \
-    $$PWD/DictTrie.hpp \
-    $$PWD/IdfTrie.hpp \
-    $$PWD/PinYinTrie.hpp \
-    $$PWD/FullSegment.hpp \
-    $$PWD/HMMModel.hpp \
-    $$PWD/HMMSegment.hpp \
-    $$PWD/Jieba.hpp \
-    $$PWD/KeywordExtractor.hpp \
-    $$PWD/MPSegment.hpp \
-    $$PWD/MixSegment.hpp \
-    $$PWD/PosTagger.hpp \
-    $$PWD/PreFilter.hpp \
-    $$PWD/QuerySegment.hpp \
-    $$PWD/SegmentBase.hpp \
-    $$PWD/SegmentTagged.hpp \
-    $$PWD/TextRankExtractor.hpp \
-#    $$PWD/Trie.hpp \
-    $$PWD/Unicode.hpp \
-    $$PWD/DatTrie.hpp \
-    $$PWD/idf-trie/idf-trie.h \
-    $$PWD/segment-trie/segment-trie.h
-
-DISTFILES += \
-    dict/README.md \
-    dict/hmm_model.utf8 \
-    dict/idf.utf8 \
-    dict/jieba.dict.utf8 \
-    dict/pos_dict/char_state_tab.utf8 \
-    dict/pos_dict/prob_emit.utf8 \
-    dict/pos_dict/prob_start.utf8 \
-    dict/pos_dict/prob_trans.utf8 \
-    dict/stop_words.utf8 \
-    dict/user.dict.utf8
-    #dict/pinyinWithoutTone.txt \
-
-include(limonp/limonp.pri)
-
-SOURCES += \
-    $$PWD/idf-trie/idf-trie.cpp \
-    $$PWD/segment-trie/segment-trie.cpp
--- a/libchinese-segmentation/cppjieba/idf-trie/idf-trie.cpp
+++ b/libchinese-segmentation/cppjieba/idf-trie/idf-trie.cpp
@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-#include "idf-trie.h"
-
-IdfTrie::IdfTrie(const vector<string> file_paths, string dat_cache_path)
-    : StorageBase<double, false, IdfCacheFileHeader>(file_paths, dat_cache_path)
-{
-    this->Init();
-}
-
-IdfTrie::IdfTrie(string file_path, string dat_cache_path)
-: StorageBase<double, false, IdfCacheFileHeader>(vector<string>{file_path}, dat_cache_path)
-{
-    this->Init();
-}
-
-void IdfTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
-{
-    IdfCacheFileHeader header;
-    assert(sizeof(header.md5_hex) == md5.size());
-    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
-    double idf_sum(0), idf_average(0), tmp(0);
-    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-    umask(S_IWGRP | S_IWOTH);
-    const int fd =mkstemp((char *)tmp_filepath.data());
-    assert(fd >= 0);
-    fchmod(fd, 0644);
-
-    write_bytes = write(fd, (const char *)&header, sizeof(IdfCacheFileHeader));
-
-    ifstream ifs(IDF_DICT_PATH);
-    string line;
-    vector<string> buf;
-
-    for (; getline(ifs, line);) {
-        if (limonp::StartsWith(line, "#") or line.empty()) {
-            continue;
-        }
-        limonp::Split(line, buf, " ");
-        if (buf.size() != 2)
-            continue;
-        this->Update(buf[0].c_str(), buf[0].size(), elements_num);
-        offset += sizeof(double);
-        elements_num++;
-        tmp = atof(buf[1].c_str());
-        write_bytes += write(fd, &tmp, sizeof(double));
-        idf_sum += tmp;
-    }
-    idf_average = idf_sum / elements_num;
-    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
-
-    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
-    write(fd, &elements_num, sizeof(int));
-    write(fd, &offset, sizeof(int));
-    data_trie_size = this->GetDataTrieSize();
-    write(fd, &data_trie_size, sizeof(int));
-    write(fd, &idf_average, sizeof(double));
-
-    close(fd);
-    assert((size_t)write_bytes == sizeof(IdfCacheFileHeader) + offset + this->GetDataTrieTotalSize());
-
-    tryRename(tmp_filepath, dat_cache_file);
-}
-
-double IdfTrie::Find(const string &key) const
-{
-    int result = this->ExactMatchSearch(key.c_str(), key.size());
-    if (result < 0)
-        return -1;
-    return this->GetElementPtr()[result];
-}
-
-double IdfTrie::GetIdfAverage() const
-{
-    return this->GetCacheFileHeaderPtr()->idf_average;
-}
-
--- a/libchinese-segmentation/cppjieba/idf-trie/idf-trie.h
+++ b/libchinese-segmentation/cppjieba/idf-trie/idf-trie.h
@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-#ifndef IdfTrie_H
-#define IdfTrie_H
-
-#include "storage-base.hpp"
-
-const char * const  IDF_DICT_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
-
-struct IdfCacheFileHeader : CacheFileHeaderBase
-{
-    double idf_average = 0;
-};
-
-class IdfTrie : public StorageBase<double, false, IdfCacheFileHeader>
-{
-public:
-    IdfTrie(const vector<string> file_paths, string dat_cache_path);
-    IdfTrie(string file_path, string dat_cache_path);
-    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
-    double Find(const string &key) const;
-    double GetIdfAverage() const;
-
-private:
-
-};
-
-#endif // IdfTrie_H
--- a/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp
@ -1,70 +0,0 @@
-/************************************
- * file enc : ascii
- * author   : wuyanyi09@gmail.com
- ************************************/
-
-#ifndef LIMONP_ARGV_FUNCTS_H
-#define LIMONP_ARGV_FUNCTS_H
-
-#include <set>
-#include <sstream>
-#include "StringUtil.hpp"
-
-namespace limonp {
-
-using namespace std;
-
-class ArgvContext {
- public :
-  ArgvContext(int argc, const char* const * argv) {
-    for(int i = 0; i < argc; i++) {
-      if(StartsWith(argv[i], "-")) {
-        if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
-          mpss_[argv[i]] = argv[i+1];
-          i++;
-        } else {
-          sset_.insert(argv[i]);
-        }
-      } else {
-        args_.push_back(argv[i]);
-      }
-    }
-  }
-  ~ArgvContext() {
-  }
-
-  friend ostream& operator << (ostream& os, const ArgvContext& args);
-  string operator [](size_t i) const {
-    if(i < args_.size()) {
-      return args_[i];
-    }
-    return "";
-  }
-  string operator [](const string& key) const {
-    map<string, string>::const_iterator it = mpss_.find(key);
-    if(it != mpss_.end()) {
-      return it->second;
-    }
-    return "";
-  }
-
-  bool HasKey(const string& key) const {
-    if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
-      return true;
-    }
-    return false;
-  }
-
- private:
-  vector<string> args_;
-  map<string, string> mpss_;
-  set<string> sset_;
-}; // class ArgvContext
-
-inline ostream& operator << (ostream& os, const ArgvContext& args) {
-  return os<<args.args_<<args.mpss_<<args.sset_;
-}
-
-} // namespace limonp
-
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/BlockingQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BlockingQueue.hpp
@ -1,49 +0,0 @@
-#ifndef LIMONP_BLOCKINGQUEUE_HPP
-#define LIMONP_BLOCKINGQUEUE_HPP
-
-#include <queue>
-#include "Condition.hpp"
-
-namespace limonp {
-template<class T>
-class BlockingQueue: NonCopyable {
- public:
-  BlockingQueue()
-    : mutex_(), notEmpty_(mutex_), queue_() {
-  }
-
-  void Push(const T& x) {
-    MutexLockGuard lock(mutex_);
-    queue_.push(x);
-    notEmpty_.Notify(); // Wait morphing saves us
-  }
-
-  T Pop() {
-    MutexLockGuard lock(mutex_);
-    // always use a while-loop, due to spurious wakeup
-    while (queue_.empty()) {
-      notEmpty_.Wait();
-    }
-    assert(!queue_.empty());
-    T front(queue_.front());
-    queue_.pop();
-    return front;
-  }
-
-  size_t Size() const {
-    MutexLockGuard lock(mutex_);
-    return queue_.size();
-  }
-  bool Empty() const {
-    return Size() == 0;
-  }
-
- private:
-  mutable MutexLock mutex_;
-  Condition         notEmpty_;
-  std::queue<T>     queue_;
-}; // class BlockingQueue
-
-} // namespace limonp
-
-#endif // LIMONP_BLOCKINGQUEUE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp
@ -1,67 +0,0 @@
-#ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
-#define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
-
-#include "BoundedQueue.hpp"
-
-namespace limonp {
-
-template<typename T>
-class BoundedBlockingQueue : NonCopyable {
- public:
-  explicit BoundedBlockingQueue(size_t maxSize)
-    : mutex_(),
-      notEmpty_(mutex_),
-      notFull_(mutex_),
-      queue_(maxSize) {
-  }
-
-  void Push(const T& x) {
-    MutexLockGuard lock(mutex_);
-    while (queue_.Full()) {
-      notFull_.Wait();
-    }
-    assert(!queue_.Full());
-    queue_.Push(x);
-    notEmpty_.Notify();
-  }
-
-  T Pop() {
-    MutexLockGuard lock(mutex_);
-    while (queue_.Empty()) {
-      notEmpty_.Wait();
-    }
-    assert(!queue_.Empty());
-    T res = queue_.Pop();
-    notFull_.Notify();
-    return res;
-  }
-
-  bool Empty() const {
-    MutexLockGuard lock(mutex_);
-    return queue_.Empty();
-  }
-
-  bool Full() const {
-    MutexLockGuard lock(mutex_);
-    return queue_.Full();
-  }
-
-  size_t size() const {
-    MutexLockGuard lock(mutex_);
-    return queue_.size();
-  }
-
-  size_t capacity() const {
-    return queue_.capacity();
-  }
-
- private:
-  mutable MutexLock          mutex_;
-  Condition                  notEmpty_;
-  Condition                  notFull_;
-  BoundedQueue<T>  queue_;
-}; // class BoundedBlockingQueue
-
-} // namespace limonp
-
-#endif // LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp
@ -1,65 +0,0 @@
-#ifndef LIMONP_BOUNDED_QUEUE_HPP
-#define LIMONP_BOUNDED_QUEUE_HPP
-
-#include <vector>
-#include <fstream>
-#include <cassert>
-
-namespace limonp {
-using namespace std;
-template<class T>
-class BoundedQueue {
- public:
-  explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
-    head_ = 0;
-    tail_ = 0;
-    size_ = 0;
-    assert(capacity_);
-  }
-  ~BoundedQueue() {
-  }
-
-  void Clear() {
-    head_ = 0;
-    tail_ = 0;
-    size_ = 0;
-  }
-  bool Empty() const {
-    return !size_;
-  }
-  bool Full() const {
-    return capacity_ == size_;
-  }
-  size_t Size() const {
-    return size_;
-  }
-  size_t Capacity() const {
-    return capacity_;
-  }
-
-  void Push(const T& t) {
-    assert(!Full());
-    circular_buffer_[tail_] = t;
-    tail_ = (tail_ + 1) % capacity_;
-    size_ ++;
-  }
-
-  T Pop() {
-    assert(!Empty());
-    size_t oldPos = head_;
-    head_ = (head_ + 1) % capacity_;
-    size_ --;
-    return circular_buffer_[oldPos];
-  }
-
- private:
-  size_t head_;
-  size_t tail_;
-  size_t size_;
-  const size_t capacity_;
-  vector<T> circular_buffer_;
-
-}; // class BoundedQueue
-} // namespace limonp
-
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/Closure.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Closure.hpp
@ -1,206 +0,0 @@
-#ifndef LIMONP_CLOSURE_HPP
-#define LIMONP_CLOSURE_HPP
-
-namespace limonp {
-
-class ClosureInterface {
- public:
-  virtual ~ClosureInterface() {
-  }
-  virtual void Run() = 0;
-};
-
-template <class Funct>
-class Closure0: public ClosureInterface {
- public:
-  Closure0(Funct fun) {
-    fun_ = fun;
-  }
-  virtual ~Closure0() {
-  }
-  virtual void Run() {
-    (*fun_)();
-  }
- private:
-  Funct fun_;
-}; 
-
-template <class Funct, class Arg1>
-class Closure1: public ClosureInterface {
- public:
-  Closure1(Funct fun, Arg1 arg1) {
-    fun_ = fun;
-    arg1_ = arg1;
-  }
-  virtual ~Closure1() {
-  }
-  virtual void Run() {
-    (*fun_)(arg1_);
-  }
- private:
-  Funct fun_;
-  Arg1 arg1_;
-}; 
-
-template <class Funct, class Arg1, class Arg2>
-class Closure2: public ClosureInterface {
- public:
-  Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
-    fun_ = fun;
-    arg1_ = arg1;
-    arg2_ = arg2;
-  }
-  virtual ~Closure2() {
-  }
-  virtual void Run() {
-    (*fun_)(arg1_, arg2_);
-  }
- private:
-  Funct fun_;
-  Arg1 arg1_;
-  Arg2 arg2_;
-}; 
-
-template <class Funct, class Arg1, class Arg2, class Arg3>
-class Closure3: public ClosureInterface {
- public:
-  Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
-    fun_ = fun;
-    arg1_ = arg1;
-    arg2_ = arg2;
-    arg3_ = arg3;
-  }
-  virtual ~Closure3() {
-  }
-  virtual void Run() {
-    (*fun_)(arg1_, arg2_, arg3_);
-  }
- private:
-  Funct fun_;
-  Arg1 arg1_;
-  Arg2 arg2_;
-  Arg3 arg3_;
-}; 
-
-template <class Obj, class Funct> 
-class ObjClosure0: public ClosureInterface {
- public:
-  ObjClosure0(Obj* p, Funct fun) {
-   p_ = p;
-   fun_ = fun;
-  }
-  virtual ~ObjClosure0() {
-  }
-  virtual void Run() {
-    (p_->*fun_)();
-  }
- private:
-  Obj* p_;
-  Funct fun_;
-}; 
-
-template <class Obj, class Funct, class Arg1> 
-class ObjClosure1: public ClosureInterface {
- public:
-  ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
-   p_ = p;
-   fun_ = fun;
-   arg1_ = arg1;
-  }
-  virtual ~ObjClosure1() {
-  }
-  virtual void Run() {
-    (p_->*fun_)(arg1_);
-  }
- private:
-  Obj* p_;
-  Funct fun_;
-  Arg1 arg1_;
-}; 
-
-template <class Obj, class Funct, class Arg1, class Arg2> 
-class ObjClosure2: public ClosureInterface {
- public:
-  ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
-   p_ = p;
-   fun_ = fun;
-   arg1_ = arg1;
-   arg2_ = arg2;
-  }
-  virtual ~ObjClosure2() {
-  }
-  virtual void Run() {
-    (p_->*fun_)(arg1_, arg2_);
-  }
- private:
-  Obj* p_;
-  Funct fun_;
-  Arg1 arg1_;
-  Arg2 arg2_;
-}; 
-template <class Obj, class Funct, class Arg1, class Arg2, class Arg3> 
-class ObjClosure3: public ClosureInterface {
- public:
-  ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
-   p_ = p;
-   fun_ = fun;
-   arg1_ = arg1;
-   arg2_ = arg2;
-   arg3_ = arg3;
-  }
-  virtual ~ObjClosure3() {
-  }
-  virtual void Run() {
-    (p_->*fun_)(arg1_, arg2_, arg3_);
-  }
- private:
-  Obj* p_;
-  Funct fun_;
-  Arg1 arg1_;
-  Arg2 arg2_;
-  Arg3 arg3_;
-}; 
-
-template<class R>
-ClosureInterface* NewClosure(R (*fun)()) {
-  return new Closure0<R (*)()>(fun);
-}
-
-template<class R, class Arg1>
-ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) {
-  return new Closure1<R (*)(Arg1), Arg1>(fun, arg1);
-}
-
-template<class R, class Arg1, class Arg2>
-ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
-  return new Closure2<R (*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
-}
-
-template<class R, class Arg1, class Arg2, class Arg3>
-ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
-  return new Closure3<R (*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
-}
-
-template<class R, class Obj>
-ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) {
-  return new ObjClosure0<Obj, R (Obj::* )()>(obj, fun);
-}
-
-template<class R, class Obj, class Arg1>
-ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) {
-  return new ObjClosure1<Obj, R (Obj::* )(Arg1), Arg1>(obj, fun, arg1);
-}
-
-template<class R, class Obj, class Arg1, class Arg2>
-ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
-  return new ObjClosure2<Obj, R (Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
-}
-
-template<class R, class Obj, class Arg1, class Arg2, class Arg3>
-ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
-  return new ObjClosure3<Obj, R (Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
-}
-
-} // namespace limonp
-
-#endif // LIMONP_CLOSURE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Colors.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Colors.hpp
@ -1,31 +0,0 @@
-#ifndef LIMONP_COLOR_PRINT_HPP
-#define LIMONP_COLOR_PRINT_HPP
-
-#include <string>
-#include <stdarg.h>
-
-namespace limonp {
-
-using std::string;
-
-enum Color {
-  BLACK = 30,
-  RED,
-  GREEN,
-  YELLOW,
-  BLUE,
-  PURPLE
-}; // enum Color
-
-static void ColorPrintln(enum Color color, const char * fmt, ...) {
-  va_list ap;
-  printf("\033[0;%dm", color);
-  va_start(ap, fmt);
-  vprintf(fmt, ap);
-  va_end(ap);
-  printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
-}
-
-} // namespace limonp
-
-#endif // LIMONP_COLOR_PRINT_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Condition.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Condition.hpp
@ -1,38 +0,0 @@
-#ifndef LIMONP_CONDITION_HPP
-#define LIMONP_CONDITION_HPP
-
-#include "MutexLock.hpp"
-
-namespace limonp {
-
-class Condition : NonCopyable {
- public:
-  explicit Condition(MutexLock& mutex)
-    : mutex_(mutex) {
-    XCHECK(!pthread_cond_init(&pcond_, NULL));
-  }
-
-  ~Condition() {
-    XCHECK(!pthread_cond_destroy(&pcond_));
-  }
-
-  void Wait() {
-    XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
-  }
-
-  void Notify() {
-    XCHECK(!pthread_cond_signal(&pcond_));
-  }
-
-  void NotifyAll() {
-    XCHECK(!pthread_cond_broadcast(&pcond_));
-  }
-
- private:
-  MutexLock& mutex_;
-  pthread_cond_t pcond_;
-}; // class Condition
-
-} // namespace limonp
-
-#endif // LIMONP_CONDITION_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Config.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Config.hpp
@ -1,103 +0,0 @@
-/************************************
- * file enc : utf8
- * author   : wuyanyi09@gmail.com
- ************************************/
-#ifndef LIMONP_CONFIG_H
-#define LIMONP_CONFIG_H
-
-#include <map>
-#include <fstream>
-#include <iostream>
-#include <assert.h>
-#include "StringUtil.hpp"
-
-namespace limonp {
-
-using namespace std;
-
-class Config {
- public:
-  explicit Config(const string& filePath) {
-    LoadFile(filePath);
-  }
-
-  operator bool () {
-    return !map_.empty();
-  }
-
-  string Get(const string& key, const string& defaultvalue) const {
-    map<string, string>::const_iterator it = map_.find(key);
-    if(map_.end() != it) {
-      return it->second;
-    }
-    return defaultvalue;
-  }
-  int Get(const string& key, int defaultvalue) const {
-    string str = Get(key, "");
-    if("" == str) {
-      return defaultvalue;
-    }
-    return atoi(str.c_str());
-  }
-  const char* operator [] (const char* key) const {
-    if(NULL == key) {
-      return NULL;
-    }
-    map<string, string>::const_iterator it = map_.find(key);
-    if(map_.end() != it) {
-      return it->second.c_str();
-    }
-    return NULL;
-  }
-
-  string GetConfigInfo() const {
-    string res;
-    res << *this;
-    return res;
-  }
-
- private:
-  void LoadFile(const string& filePath) {
-    ifstream ifs(filePath.c_str());
-    assert(ifs);
-    string line;
-    vector<string> vecBuf;
-    size_t lineno = 0;
-    while(getline(ifs, line)) {
-      lineno ++;
-      Trim(line);
-      if(line.empty() || StartsWith(line, "#")) {
-        continue;
-      }
-      vecBuf.clear();
-      Split(line, vecBuf, "=");
-      if(2 != vecBuf.size()) {
-        fprintf(stderr, "line[%s] illegal.\n", line.c_str());
-        assert(false);
-        continue;
-      }
-      string& key = vecBuf[0];
-      string& value = vecBuf[1];
-      Trim(key);
-      Trim(value);
-      if(!map_.insert(make_pair(key, value)).second) {
-        fprintf(stderr, "key[%s] already exits.\n", key.c_str());
-        assert(false);
-        continue;
-      }
-    }
-    ifs.close();
-  }
-
-  friend ostream& operator << (ostream& os, const Config& config);
-
-  map<string, string> map_;
-}; // class Config
-
-inline ostream& operator << (ostream& os, const Config& config) {
-  return os << config.map_;
-}
-
-} // namespace limonp
-
-#endif // LIMONP_CONFIG_H
--- a/libchinese-segmentation/cppjieba/limonp/FileLock.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/FileLock.hpp
@ -1,74 +0,0 @@
-#ifndef LIMONP_FILELOCK_HPP
-#define LIMONP_FILELOCK_HPP
-
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <string>
-#include <string.h>
-#include <assert.h>
-
-namespace limonp {
-
-using std::string;
-
-class FileLock {
- public:
-  FileLock() : fd_(-1), ok_(true) {
-  }
-  ~FileLock() {
-    if(fd_ > 0) {
-      Close();
-    }
-  }
-  void Open(const string& fname) {
-    assert(fd_ == -1);
-    fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
-    if(fd_ < 0) {
-      ok_ = false;
-      err_ = strerror(errno);
-    }
-  }
-  void Close() {
-    ::close(fd_);
-  }
-  void Lock() {
-    if(LockOrUnlock(fd_, true) < 0) {
-      ok_ = false;
-      err_ = strerror(errno);
-    }
-  }
-  void UnLock() {
-    if(LockOrUnlock(fd_, false) < 0) {
-      ok_ = false;
-      err_ = strerror(errno);
-    }
-  }
-  bool Ok() const {
-    return ok_;
-  }
-  string Error() const {
-    return err_;
-  }
- private:
-  static int LockOrUnlock(int fd, bool lock) {
-    errno = 0;
-    struct flock f;
-    memset(&f, 0, sizeof(f));
-    f.l_type = (lock ? F_WRLCK : F_UNLCK);
-    f.l_whence = SEEK_SET;
-    f.l_start = 0;
-    f.l_len = 0;        // Lock/unlock entire file
-    return fcntl(fd, F_SETLK, &f);
-  }
-
-  int fd_;
-  bool ok_;
-  string err_;
-}; // class FileLock
-
-}// namespace limonp
-
-#endif // LIMONP_FILELOCK_HPP
--- a/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp
@ -1,7 +0,0 @@
-#ifndef LIMONP_FORCE_PUBLIC_H
-#define LIMONP_FORCE_PUBLIC_H
-
-#define private public
-#define protected public
-
-#endif // LIMONP_FORCE_PUBLIC_H
--- a/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp
@ -1,142 +0,0 @@
-#ifndef LIMONP_LOCAL_VECTOR_HPP
-#define LIMONP_LOCAL_VECTOR_HPP
-
-#include <iostream>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-
-namespace limonp {
-using namespace std;
-/*
- * LocalVector<T> : T must be primitive type (char , int, size_t), if T is struct or class, LocalVector<T> may be dangerous..
- * LocalVector<T> is simple and not well-tested.
- */
-const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
-template <class T>
-class LocalVector {
- public:
-  typedef const T* const_iterator ;
-  typedef T value_type;
-  typedef size_t size_type;
- private:
-  T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
-  T * ptr_;
-  size_t size_;
-  size_t capacity_;
- public:
-  LocalVector() {
-    init_();
-  };
-  LocalVector(const LocalVector<T>& vec) {
-    init_();
-    *this = vec;
-  }
-  LocalVector(const_iterator  begin, const_iterator end) { // TODO: make it faster
-    init_();
-    while(begin != end) {
-      push_back(*begin++);
-    }
-  }
-  LocalVector(size_t size, const T& t) { // TODO: make it faster
-    init_();
-    while(size--) {
-      push_back(t);
-    }
-  }
-  ~LocalVector() {
-    if(ptr_ != buffer_) {
-      free(ptr_);
-    }
-  };
- public:
-  LocalVector<T>& operator = (const LocalVector<T>& vec) {
-      if(this == &vec){
-          return *this;
-      }
-    clear();
-    size_ = vec.size();
-    capacity_ = vec.capacity();
-    if(vec.buffer_ == vec.ptr_) {
-      memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
-      ptr_ = buffer_;
-    } else {
-      ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
-      assert(ptr_);
-      memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
-    }
-    return *this;
-  }
- private:
-  void init_() {
-    ptr_ = buffer_;
-    size_ = 0;
-    capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
-  }
- public:
-  T& operator [] (size_t i) {
-    return ptr_[i];
-  }
-  const T& operator [] (size_t i) const {
-    return ptr_[i];
-  }
-  void push_back(const T& t) {
-    if(size_ == capacity_) {
-      assert(capacity_);
-      reserve(capacity_ * 2);
-    }
-    ptr_[size_ ++ ] = t;
-  }
-  void reserve(size_t size) {
-    if(size <= capacity_) {
-      return;
-    }
-    T * next =  (T*)malloc(sizeof(T) * size);
-    assert(next);
-    T * old = ptr_;
-    ptr_ = next;
-    memcpy(ptr_, old, sizeof(T) * capacity_);
-    capacity_ = size;
-    if(old != buffer_) {
-      free(old);
-    }
-  }
-  bool empty() const {
-    return 0 == size();
-  }
-  size_t size() const {
-    return size_;
-  }
-  size_t capacity() const {
-    return capacity_;
-  }
-  const_iterator begin() const {
-    return ptr_;
-  }
-  const_iterator end() const {
-    return ptr_ + size_;
-  }
-  void clear() {
-    if(ptr_ != buffer_) {
-      free(ptr_);
-    }
-    init_();
-  }
-};
-
-template <class T>
-ostream & operator << (ostream& os, const LocalVector<T>& vec) {
-  if(vec.empty()) {
-    return os << "[]";
-  }
-  os<<"[\""<<vec[0];
-  for(size_t i = 1; i < vec.size(); i++) {
-    os<<"\", \""<<vec[i];
-  }
-  os<<"\"]";
-  return os;
-}
-
-}
-
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/Logging.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Logging.hpp
@ -1,77 +0,0 @@
-#ifndef LIMONP_LOGGING_HPP
-#define LIMONP_LOGGING_HPP
-
-#include <sstream>
-#include <iostream>
-#include <cassert>
-#include <cstdlib>
-#include <ctime>
-
-#ifdef XLOG
-#error "XLOG has been defined already"
-#endif // XLOG
-#ifdef XCHECK
-#error "XCHECK has been defined already"
-#endif // XCHECK
-
-#define XLOG(level) limonp::Logger(limonp::LL_##level, __FILE__, __LINE__).Stream()
-#define XCHECK(exp) if(!(exp)) XLOG(FATAL) << "exp: ["#exp << "] false. "
-
-namespace limonp {
-
-enum {
-  LL_DEBUG = 0,
-  LL_INFO = 1,
-  LL_WARNING = 2,
-  LL_ERROR = 3,
-  LL_FATAL = 4,
-}; // enum
-
-static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
-
-class Logger {
- public:
-  Logger(size_t level, const char* filename, int lineno)
-   : level_(level) {
-#ifdef LOGGING_LEVEL
-     if (level_ < LOGGING_LEVEL) {
-       return;
-     }
-#endif
-    assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
-    char buf[32];
-    time_t now;
-    time(&now);
-    struct tm result;
-    localtime_r(&now, &result);
-    strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &result);
-    stream_ << buf
-      << " " << filename
-      << ":" << lineno
-      << " " << LOG_LEVEL_ARRAY[level_]
-      << " ";
-  }
-  ~Logger() {
-#ifdef LOGGING_LEVEL
-     if (level_ < LOGGING_LEVEL) {
-       return;
-     }
-#endif
-    std::cerr << stream_.str() << std::endl;
-    if (level_ == LL_FATAL) {
-      abort();
-    }
-  }
-
-  std::ostream& Stream() {
-    return stream_;
-  }
-
- private:
-  std::ostringstream stream_;
-  size_t level_;
-}; // class Logger
-
-} // namespace limonp
-
-#endif // LIMONP_LOGGING_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Md5.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Md5.hpp
@ -1,415 +0,0 @@
-/****************************************************************************
-**Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991
-**              2020, KylinSoft Co., Ltd.
-**All rights reserved.
-**
-**License to copy and use this software is granted provided that it
-**is identified as the "RSA Data Security, Inc. MD5 Message-Digest
-**Algorithm" in all material mentioning or referencing this software
-**or this function.
-**
-**License is also granted to make and use derivative works provided
-**that such works are identified as "derived from the RSA Data
-**Security, Inc. MD5 Message-Digest Algorithm" in all material
-**mentioning or referencing the derived work.
-**
-**RSA Data Security, Inc. makes no representations concerning either
-**the merchantability of this software or the suitability of this
-**software for any particular purpose. It is provided "as is"
-**without express or implied warranty of any kind.
-**
-**These notices must be retained in any copies of any part of this
-**documentation and/or software.
-**
-**
-**
-**The original md5 implementation avoids external libraries.
-**This version has dependency on stdio.h for file input and
-**string.h for memcpy.
-**
-****************************************************************************/
-
-#ifndef __MD5_H__
-#define __MD5_H__
-#include <cstdio>
-#include <cstring>
-#include <iostream>
-
-namespace limonp {
-
-//#pragma region MD5 defines
-// Constants for MD5Transform routine.
-#define S11 7
-#define S12 12
-#define S13 17
-#define S14 22
-#define S21 5
-#define S22 9
-#define S23 14
-#define S24 20
-#define S31 4
-#define S32 11
-#define S33 16
-#define S34 23
-#define S41 6
-#define S42 10
-#define S43 15
-#define S44 21
-
-
-// F, G, H and I are basic MD5 functions.
-#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
-#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
-#define H(x, y, z) ((x) ^ (y) ^ (z))
-#define I(x, y, z) ((y) ^ ((x) | (~z)))
-
-// ROTATE_LEFT rotates x left n bits.
-#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
-
-// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
-// Rotation is separate from addition to prevent recomputation.
-#define FF(a, b, c, d, x, s, ac) { \
-  (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
-  (a) = ROTATE_LEFT ((a), (s)); \
-  (a) += (b); \
-  }
-#define GG(a, b, c, d, x, s, ac) { \
-  (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
-  (a) = ROTATE_LEFT ((a), (s)); \
-  (a) += (b); \
-  }
-#define HH(a, b, c, d, x, s, ac) { \
-  (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
-  (a) = ROTATE_LEFT ((a), (s)); \
-  (a) += (b); \
-  }
-#define II(a, b, c, d, x, s, ac) { \
-  (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
-  (a) = ROTATE_LEFT ((a), (s)); \
-  (a) += (b); \
-  }
-//#pragma endregion
-
-
-typedef unsigned char BYTE ;
-
-// POINTER defines a generic pointer type
-typedef unsigned char *POINTER;
-
-// UINT2 defines a two byte word
-typedef unsigned short int UINT2;
-
-// UINT4 defines a four byte word
-typedef unsigned int UINT4;
-
-static unsigned char PADDING[64] = {
-    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-// convenient object that wraps
-// the C-functions for use in C++ only
-class MD5 {
-private:
-    struct __context_t {
-        UINT4 state[4];                                   /* state (ABCD) */
-        UINT4 count[2];        /* number of bits, modulo 2^64 (lsb first) */
-        unsigned char buffer[64];                         /* input buffer */
-    } context ;
-
-    //#pragma region static helper functions
-    // The core of the MD5 algorithm is here.
-    // MD5 basic transformation. Transforms state based on block.
-    static void MD5Transform(UINT4 state[4], unsigned char block[64]) {
-        UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
-
-        Decode(x, block, 64);
-
-        /* Round 1 */
-        FF(a, b, c, d, x[ 0], S11, 0xd76aa478);  /* 1 */
-        FF(d, a, b, c, x[ 1], S12, 0xe8c7b756);  /* 2 */
-        FF(c, d, a, b, x[ 2], S13, 0x242070db);  /* 3 */
-        FF(b, c, d, a, x[ 3], S14, 0xc1bdceee);  /* 4 */
-        FF(a, b, c, d, x[ 4], S11, 0xf57c0faf);  /* 5 */
-        FF(d, a, b, c, x[ 5], S12, 0x4787c62a);  /* 6 */
-        FF(c, d, a, b, x[ 6], S13, 0xa8304613);  /* 7 */
-        FF(b, c, d, a, x[ 7], S14, 0xfd469501);  /* 8 */
-        FF(a, b, c, d, x[ 8], S11, 0x698098d8);  /* 9 */
-        FF(d, a, b, c, x[ 9], S12, 0x8b44f7af);  /* 10 */
-        FF(c, d, a, b, x[10], S13, 0xffff5bb1);  /* 11 */
-        FF(b, c, d, a, x[11], S14, 0x895cd7be);  /* 12 */
-        FF(a, b, c, d, x[12], S11, 0x6b901122);  /* 13 */
-        FF(d, a, b, c, x[13], S12, 0xfd987193);  /* 14 */
-        FF(c, d, a, b, x[14], S13, 0xa679438e);  /* 15 */
-        FF(b, c, d, a, x[15], S14, 0x49b40821);  /* 16 */
-
-        /* Round 2 */
-        GG(a, b, c, d, x[ 1], S21, 0xf61e2562);  /* 17 */
-        GG(d, a, b, c, x[ 6], S22, 0xc040b340);  /* 18 */
-        GG(c, d, a, b, x[11], S23, 0x265e5a51);  /* 19 */
-        GG(b, c, d, a, x[ 0], S24, 0xe9b6c7aa);  /* 20 */
-        GG(a, b, c, d, x[ 5], S21, 0xd62f105d);  /* 21 */
-        GG(d, a, b, c, x[10], S22,  0x2441453);  /* 22 */
-        GG(c, d, a, b, x[15], S23, 0xd8a1e681);  /* 23 */
-        GG(b, c, d, a, x[ 4], S24, 0xe7d3fbc8);  /* 24 */
-        GG(a, b, c, d, x[ 9], S21, 0x21e1cde6);  /* 25 */
-        GG(d, a, b, c, x[14], S22, 0xc33707d6);  /* 26 */
-        GG(c, d, a, b, x[ 3], S23, 0xf4d50d87);  /* 27 */
-        GG(b, c, d, a, x[ 8], S24, 0x455a14ed);  /* 28 */
-        GG(a, b, c, d, x[13], S21, 0xa9e3e905);  /* 29 */
-        GG(d, a, b, c, x[ 2], S22, 0xfcefa3f8);  /* 30 */
-        GG(c, d, a, b, x[ 7], S23, 0x676f02d9);  /* 31 */
-        GG(b, c, d, a, x[12], S24, 0x8d2a4c8a);  /* 32 */
-
-        /* Round 3 */
-        HH(a, b, c, d, x[ 5], S31, 0xfffa3942);  /* 33 */
-        HH(d, a, b, c, x[ 8], S32, 0x8771f681);  /* 34 */
-        HH(c, d, a, b, x[11], S33, 0x6d9d6122);  /* 35 */
-        HH(b, c, d, a, x[14], S34, 0xfde5380c);  /* 36 */
-        HH(a, b, c, d, x[ 1], S31, 0xa4beea44);  /* 37 */
-        HH(d, a, b, c, x[ 4], S32, 0x4bdecfa9);  /* 38 */
-        HH(c, d, a, b, x[ 7], S33, 0xf6bb4b60);  /* 39 */
-        HH(b, c, d, a, x[10], S34, 0xbebfbc70);  /* 40 */
-        HH(a, b, c, d, x[13], S31, 0x289b7ec6);  /* 41 */
-        HH(d, a, b, c, x[ 0], S32, 0xeaa127fa);  /* 42 */
-        HH(c, d, a, b, x[ 3], S33, 0xd4ef3085);  /* 43 */
-        HH(b, c, d, a, x[ 6], S34,  0x4881d05);  /* 44 */
-        HH(a, b, c, d, x[ 9], S31, 0xd9d4d039);  /* 45 */
-        HH(d, a, b, c, x[12], S32, 0xe6db99e5);  /* 46 */
-        HH(c, d, a, b, x[15], S33, 0x1fa27cf8);  /* 47 */
-        HH(b, c, d, a, x[ 2], S34, 0xc4ac5665);  /* 48 */
-
-        /* Round 4 */
-        II(a, b, c, d, x[ 0], S41, 0xf4292244);  /* 49 */
-        II(d, a, b, c, x[ 7], S42, 0x432aff97);  /* 50 */
-        II(c, d, a, b, x[14], S43, 0xab9423a7);  /* 51 */
-        II(b, c, d, a, x[ 5], S44, 0xfc93a039);  /* 52 */
-        II(a, b, c, d, x[12], S41, 0x655b59c3);  /* 53 */
-        II(d, a, b, c, x[ 3], S42, 0x8f0ccc92);  /* 54 */
-        II(c, d, a, b, x[10], S43, 0xffeff47d);  /* 55 */
-        II(b, c, d, a, x[ 1], S44, 0x85845dd1);  /* 56 */
-        II(a, b, c, d, x[ 8], S41, 0x6fa87e4f);  /* 57 */
-        II(d, a, b, c, x[15], S42, 0xfe2ce6e0);  /* 58 */
-        II(c, d, a, b, x[ 6], S43, 0xa3014314);  /* 59 */
-        II(b, c, d, a, x[13], S44, 0x4e0811a1);  /* 60 */
-        II(a, b, c, d, x[ 4], S41, 0xf7537e82);  /* 61 */
-        II(d, a, b, c, x[11], S42, 0xbd3af235);  /* 62 */
-        II(c, d, a, b, x[ 2], S43, 0x2ad7d2bb);  /* 63 */
-        II(b, c, d, a, x[ 9], S44, 0xeb86d391);  /* 64 */
-
-        state[0] += a;
-        state[1] += b;
-        state[2] += c;
-        state[3] += d;
-
-        // Zeroize sensitive information.
-        memset((POINTER)x, 0, sizeof(x));
-    }
-
-    // Encodes input (UINT4) into output (unsigned char). Assumes len is
-    // a multiple of 4.
-    static void Encode(unsigned char *output, UINT4 *input, unsigned int len) {
-        unsigned int i, j;
-
-        for(i = 0, j = 0; j < len; i++, j += 4) {
-            output[j] = (unsigned char)(input[i] & 0xff);
-            output[j + 1] = (unsigned char)((input[i] >> 8) & 0xff);
-            output[j + 2] = (unsigned char)((input[i] >> 16) & 0xff);
-            output[j + 3] = (unsigned char)((input[i] >> 24) & 0xff);
-        }
-    }
-
-    // Decodes input (unsigned char) into output (UINT4). Assumes len is
-    // a multiple of 4.
-    static void Decode(UINT4 *output, unsigned char *input, unsigned int len) {
-        unsigned int i, j;
-
-        for(i = 0, j = 0; j < len; i++, j += 4)
-            output[i] = ((UINT4)input[j]) | (((UINT4)input[j + 1]) << 8) |
-                        (((UINT4)input[j + 2]) << 16) | (((UINT4)input[j + 3]) << 24);
-    }
-    //#pragma endregion
-
-
-public:
-    // MAIN FUNCTIONS
-    MD5() {
-        Init() ;
-    }
-
-    // MD5 initialization. Begins an MD5 operation, writing a new context.
-    void Init() {
-        context.count[0] = context.count[1] = 0;
-
-        // Load magic initialization constants.
-        context.state[0] = 0x67452301;
-        context.state[1] = 0xefcdab89;
-        context.state[2] = 0x98badcfe;
-        context.state[3] = 0x10325476;
-    }
-
-    // MD5 block update operation. Continues an MD5 message-digest
-    // operation, processing another message block, and updating the
-    // context.
-    void Update(
-        unsigned char *input,   // input block
-        unsigned int inputLen) {  // length of input block
-        unsigned int i, index, partLen;
-
-        // Compute number of bytes mod 64
-        index = (unsigned int)((context.count[0] >> 3) & 0x3F);
-
-        // Update number of bits
-        if((context.count[0] += ((UINT4)inputLen << 3))
-                < ((UINT4)inputLen << 3))
-            context.count[1]++;
-        context.count[1] += ((UINT4)inputLen >> 29);
-
-        partLen = 64 - index;
-
-        // Transform as many times as possible.
-        if(inputLen >= partLen) {
-            memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
-            MD5Transform(context.state, context.buffer);
-
-            for(i = partLen; i + 63 < inputLen; i += 64)
-                MD5Transform(context.state, &input[i]);
-
-            index = 0;
-        } else
-            i = 0;
-
-        /* Buffer remaining input */
-        memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen - i);
-    }
-
-    // MD5 finalization. Ends an MD5 message-digest operation, writing the
-    // the message digest and zeroizing the context.
-    // Writes to digestRaw
-    void Final() {
-        unsigned char bits[8];
-        unsigned int index, padLen;
-
-        // Save number of bits
-        Encode(bits, context.count, 8);
-
-        // Pad out to 56 mod 64.
-        index = (unsigned int)((context.count[0] >> 3) & 0x3f);
-        padLen = (index < 56) ? (56 - index) : (120 - index);
-        Update(PADDING, padLen);
-
-        // Append length (before padding)
-        Update(bits, 8);
-
-        // Store state in digest
-        Encode(digestRaw, context.state, 16);
-
-        // Zeroize sensitive information.
-        memset((POINTER)&context, 0, sizeof(context));
-
-        writeToString() ;
-    }
-
-    /// Buffer must be 32+1 (nul) = 33 chars long at least
-    void writeToString() {
-        int pos ;
-
-        for(pos = 0 ; pos < 16 ; pos++)
-            sprintf(digestChars + (pos * 2), "%02x", digestRaw[pos]) ;
-    }
-
-
-public:
-    // an MD5 digest is a 16-byte number (32 hex digits)
-    BYTE digestRaw[ 16 ] ;
-
-    // This version of the digest is actually
-    // a "printf'd" version of the digest.
-    char digestChars[ 33 ] ;
-
-    /// Load a file from disk and digest it
-    // Digests a file and returns the result.
-    const char* digestFile(const char *filename) {
-        if(NULL == filename || strcmp(filename, "") == 0)
-            return NULL;
-
-        Init() ;
-
-        FILE *file;
-
-        unsigned char buffer[1024] ;
-
-        if((file = fopen(filename, "rb")) == NULL) {
-            return NULL;
-        }
-        int len;
-        while((len = fread(buffer, 1, 1024, file)))
-            Update(buffer, len) ;
-        Final();
-
-        fclose(file);
-
-        return digestChars ;
-    }
-
-    /// Digests a byte-array already in memory
-    const char* digestMemory(BYTE *memchunk, int len) {
-        if(NULL == memchunk)
-            return NULL;
-
-        Init() ;
-        Update(memchunk, len) ;
-        Final() ;
-
-        return digestChars ;
-    }
-
-    // Digests a string and prints the result.
-    const char* digestString(const char *string) {
-        if(string == NULL)
-            return NULL;
-
-        Init() ;
-        Update((unsigned char*)string, strlen(string)) ;
-        Final() ;
-
-        return digestChars ;
-    }
-};
-
-inline bool md5String(const char* str, std::string& res) {
-    if(NULL == str) {
-        res = "";
-        return false;
-    }
-
-    MD5 md5;
-    const char *pRes = md5.digestString(str);
-    if(NULL == pRes) {
-        res = "";
-        return false;
-    }
-
-    res = pRes;
-    return true;
-}
-
-inline bool md5File(const char* filepath, std::string& res) {
-    if(NULL == filepath || strcmp(filepath, "") == 0) {
-        res = "";
-        return false;
-    }
-
-    MD5 md5;
-    const char *pRes = md5.digestFile(filepath);
-
-    if(NULL == pRes) {
-        res = "";
-        return false;
-    }
-
-    res = pRes;
-    return true;
-}
-}
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp
@ -1,51 +0,0 @@
-#ifndef LIMONP_MUTEX_LOCK_HPP
-#define LIMONP_MUTEX_LOCK_HPP
-
-#include <pthread.h>
-#include "NonCopyable.hpp"
-#include "Logging.hpp"
-
-namespace limonp {
-
-class MutexLock: NonCopyable {
- public:
-  MutexLock() {
-    XCHECK(!pthread_mutex_init(&mutex_, NULL));
-  }
-  ~MutexLock() {
-    XCHECK(!pthread_mutex_destroy(&mutex_));
-  }
-  pthread_mutex_t* GetPthreadMutex() {
-    return &mutex_;
-  }
-
- private:
-  void Lock() {
-    XCHECK(!pthread_mutex_lock(&mutex_));
-  }
-  void Unlock() {
-    XCHECK(!pthread_mutex_unlock(&mutex_));
-  }
-  friend class MutexLockGuard;
-
-  pthread_mutex_t mutex_;
-}; // class MutexLock
-
-class MutexLockGuard: NonCopyable {
- public:
-  explicit MutexLockGuard(MutexLock & mutex)
-    : mutex_(mutex) {
-    mutex_.Lock();
-  }
-  ~MutexLockGuard() {
-    mutex_.Unlock();
-  }
- private:
-  MutexLock & mutex_;
-}; // class MutexLockGuard
-
-#define MutexLockGuard(x) XCHECK(false);
-
-} // namespace limonp
-
-#endif // LIMONP_MUTEX_LOCK_HPP
--- a/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp
@ -1,21 +0,0 @@
-/************************************
- ************************************/
-#ifndef LIMONP_NONCOPYABLE_H
-#define LIMONP_NONCOPYABLE_H
-
-namespace limonp {
-
-class NonCopyable {
- protected:
-  NonCopyable() {
-  }
-  ~NonCopyable() {
-  }
- private:
-  NonCopyable(const NonCopyable& );
-  const NonCopyable& operator=(const NonCopyable& );
-}; // class NonCopyable
-
-} // namespace limonp
-
-#endif // LIMONP_NONCOPYABLE_H
--- a/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp
@ -1,157 +0,0 @@
-#ifndef LIMONP_STD_EXTEMSION_HPP
-#define LIMONP_STD_EXTEMSION_HPP
-
-#include <map>
-
-#ifdef __APPLE__
-#include <unordered_map>
-#include <unordered_set>
-#elif(__cplusplus >= 201103L)
-#include <unordered_map>
-#include <unordered_set>
-#elif defined _MSC_VER
-#include <unordered_map>
-#include <unordered_set>
-#else
-#include <tr1/unordered_map>
-#include <tr1/unordered_set>
-namespace std {
-using std::tr1::unordered_map;
-using std::tr1::unordered_set;
-}
-
-#endif
-
-#include <set>
-#include <string>
-#include <vector>
-#include <deque>
-#include <fstream>
-#include <sstream>
-
-namespace std {
-
-template<typename T>
-ostream& operator << (ostream& os, const vector<T>& v) {
-  if(v.empty()) {
-    return os << "[]";
-  }
-  os<<"["<<v[0];
-  for(size_t i = 1; i < v.size(); i++) {
-    os<<", "<<v[i];
-  }
-  os<<"]";
-  return os;
-}
-
-template<>
-inline ostream& operator << (ostream& os, const vector<string>& v) {
-  if(v.empty()) {
-    return os << "[]";
-  }
-  os<<"[\""<<v[0];
-  for(size_t i = 1; i < v.size(); i++) {
-    os<<"\", \""<<v[i];
-  }
-  os<<"\"]";
-  return os;
-}
-
-template<typename T>
-ostream& operator << (ostream& os, const deque<T>& dq) {
-  if(dq.empty()) {
-    return os << "[]";
-  }
-  os<<"[\""<<dq[0];
-  for(size_t i = 1; i < dq.size(); i++) {
-    os<<"\", \""<<dq[i];
-  }
-  os<<"\"]";
-  return os;
-}
-
-
-template<class T1, class T2>
-ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
-  os << pr.first << ":" << pr.second ;
-  return os;
-}
-
-
-template<class T>
-string& operator << (string& str, const T& obj) {
-  stringstream ss;
-  ss << obj; // call ostream& operator << (ostream& os,
-  return str = ss.str();
-}
-
-template<class T1, class T2>
-ostream& operator << (ostream& os, const map<T1, T2>& mp) {
-  if(mp.empty()) {
-    os<<"{}";
-    return os;
-  }
-  os<<'{';
-  typename map<T1, T2>::const_iterator it = mp.begin();
-  os<<*it;
-  it++;
-  while(it != mp.end()) {
-    os<<", "<<*it;
-    it++;
-  }
-  os<<'}';
-  return os;
-}
-template<class T1, class T2>
-ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
-  if(mp.empty()) {
-    return os << "{}";
-  }
-  os<<'{';
-  typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
-  os<<*it;
-  it++;
-  while(it != mp.end()) {
-    os<<", "<<*it++;
-  }
-  return os<<'}';
-}
-
-template<class T>
-ostream& operator << (ostream& os, const set<T>& st) {
-  if(st.empty()) {
-    os << "{}";
-    return os;
-  }
-  os<<'{';
-  typename set<T>::const_iterator it = st.begin();
-  os<<*it;
-  it++;
-  while(it != st.end()) {
-    os<<", "<<*it;
-    it++;
-  }
-  os<<'}';
-  return os;
-}
-
-template<class KeyType, class ContainType>
-bool IsIn(const ContainType& contain, const KeyType& key) {
-  return contain.end() != contain.find(key);
-}
-
-template<class T>
-basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
-  return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
-}
-
-template<class T>
-ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
-  ostreambuf_iterator<T> itr (ofs);
-  copy(s.begin(), s.end(), itr);
-  return ofs;
-}
-
-} // namespace std
-
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp
@ -1,382 +0,0 @@
-/************************************
- * file enc : ascii
- * author   : wuyanyi09@gmail.com
- ************************************/
-#ifndef LIMONP_STR_FUNCTS_H
-#define LIMONP_STR_FUNCTS_H
-#include <stdint.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <memory.h>
-#include <sys/types.h>
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <vector>
-#include <algorithm>
-#include <cctype>
-#include <map>
-#include <functional>
-#include <locale>
-#include <sstream>
-#include <iterator>
-#include <algorithm>
-#include "StdExtension.hpp"
-
-namespace limonp {
-using namespace std;
-inline string StringFormat(const char* fmt, ...) {
-  int size = 256;
-  std::string str;
-  va_list ap;
-  while (1) {
-    str.resize(size);
-    va_start(ap, fmt);
-    int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
-    va_end(ap);
-    if (n > -1 && n < size) {
-      str.resize(n);
-      return str;
-    }
-    if (n > -1)
-      size = n + 1;
-    else
-      size *= 2;
-  }
-  return str;
-}
-
-template<class T>
-void Join(T begin, T end, string& res, const string& connector) {
-  if(begin == end) {
-    return;
-  }
-  stringstream ss;
-  ss<<*begin;
-  begin++;
-  while(begin != end) {
-    ss << connector << *begin;
-    begin ++;
-  }
-  res = ss.str();
-}
-
-template<class T>
-string Join(T begin, T end, const string& connector) {
-  string res;
-  Join(begin ,end, res, connector);
-  return res;
-}
-
-inline string& Upper(string& str) {
-  transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
-  return str;
-}
-
-inline string& Lower(string& str) {
-  transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
-  return str;
-}
-
-inline bool IsSpace(unsigned c) {
-  // when passing large int as the argument of isspace, it core dump, so here need a type cast.
-  return c > 0xff ? false : std::isspace(c & 0xff);
-}
-
-inline std::string& LTrim(std::string &s) {
-  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
-  return s;
-}
-
-inline std::string& RTrim(std::string &s) {
-  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
-  return s;
-}
-
-inline std::string& Trim(std::string &s) {
-  return LTrim(RTrim(s));
-}
-
-inline std::string& LTrim(std::string & s, char x) {
-  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
-  return s;
-}
-
-inline std::string& RTrim(std::string & s, char x) {
-  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
-  return s;
-}
-
-inline std::string& Trim(std::string &s, char x) {
-  return LTrim(RTrim(s, x), x);
-}
-
-inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
-  res.clear();
-  size_t Start = 0;
-  size_t end = 0;
-  string sub;
-  while(Start < src.size()) {
-    end = src.find_first_of(pattern, Start);
-    if(string::npos == end || res.size() >= maxsplit) {
-      sub = src.substr(Start);
-      res.push_back(sub);
-      return;
-    }
-    sub = src.substr(Start, end - Start);
-    res.push_back(sub);
-    Start = end + 1;
-  }
-  return;
-}
-
-inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
-  vector<string> res;
-  Split(src, res, pattern, maxsplit);
-  return res;
-}
-
-inline bool StartsWith(const string& str, const string& prefix) {
-  if(prefix.length() > str.length()) {
-    return false;
-  }
-  return 0 == str.compare(0, prefix.length(), prefix);
-}
-
-inline bool EndsWith(const string& str, const string& suffix) {
-  if(suffix.length() > str.length()) {
-    return false;
-  }
-  return 0 == str.compare(str.length() -  suffix.length(), suffix.length(), suffix);
-}
-
-inline bool IsInStr(const string& str, char ch) {
-  return str.find(ch) != string::npos;
-}
-
-inline uint16_t TwocharToUint16(char high, char low) {
-  return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
-}
-
-template <class Uint16Container>
-bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
-  if(!str) {
-    return false;
-  }
-  char ch1, ch2;
-  uint16_t tmp;
-  vec.clear();
-  for(size_t i = 0; i < len;) {
-    if(!(str[i] & 0x80)) { // 0xxxxxxx
-      vec.push_back(str[i]);
-      i++;
-    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
-      ch1 = (str[i] >> 2) & 0x07;
-      ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
-      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
-      vec.push_back(tmp);
-      i += 2;
-    } else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
-      ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
-      ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
-      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
-      vec.push_back(tmp);
-      i += 3;
-    } else {
-      return false;
-    }
-  }
-  return true;
-}
-
-template <class Uint16Container>
-bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
-  return Utf8ToUnicode(str.c_str(), str.size(), vec);
-}
-
-template <class Uint32Container>
-bool Utf8ToUnicode32(const char * str, size_t size, Uint32Container& vec) {
-  uint32_t tmp;
-  vec.clear();
-  for(size_t i = 0; i < size;) {
-    if(!(str[i] & 0x80)) { // 0xxxxxxx
-      // 7bit, total 7bit
-      tmp = (uint8_t)(str[i]) & 0x7f;
-      i++;
-    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < size) { // 110xxxxxx
-      // 5bit, total 5bit
-      tmp = (uint8_t)(str[i]) & 0x1f;
-
-      // 6bit, total 11bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+1]) & 0x3f;
-      i += 2;
-    } else if((uint8_t)str[i] <= 0xef && i + 2 < size) { // 1110xxxxxx
-      // 4bit, total 4bit
-      tmp = (uint8_t)(str[i]) & 0x0f;
-
-      // 6bit, total 10bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+1]) & 0x3f;
-
-      // 6bit, total 16bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+2]) & 0x3f;
-
-      i += 3;
-    } else if((uint8_t)str[i] <= 0xf7 && i + 3 < size) { // 11110xxxx
-      // 3bit, total 3bit
-      tmp = (uint8_t)(str[i]) & 0x07;
-
-      // 6bit, total 9bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+1]) & 0x3f;
-
-      // 6bit, total 15bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+2]) & 0x3f;
-
-      // 6bit, total 21bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+3]) & 0x3f;
-
-      i += 4;
-    } else {
-      return false;
-    }
-    vec.push_back(tmp);
-  }
-  return true;
-}
-
-template <class Uint32Container>
-bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
-    return Utf8ToUnicode32(str.data(), str.size(), vec);
-}
-
-inline int UnicodeToUtf8Bytes(uint32_t ui){
-    if(ui <= 0x7f) {
-        return 1;
-    } else if(ui <= 0x7ff) {
-        return 2;
-    } else if(ui <= 0xffff) {
-        return 3;
-    } else {
-        return 4;
-    }
-}
-
-template <class Uint32ContainerConIter>
-void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
-  res.clear();
-  uint32_t ui;
-  while(begin != end) {
-    ui = *begin;
-    if(ui <= 0x7f) {
-      res += char(ui);
-    } else if(ui <= 0x7ff) {
-      res += char(((ui >> 6) & 0x1f) | 0xc0);
-      res += char((ui & 0x3f) | 0x80);
-    } else if(ui <= 0xffff) {
-      res += char(((ui >> 12) & 0x0f) | 0xe0);
-      res += char(((ui >> 6) & 0x3f) | 0x80);
-      res += char((ui & 0x3f) | 0x80);
-    } else {
-      res += char(((ui >> 18) & 0x03) | 0xf0);
-      res += char(((ui >> 12) & 0x3f) | 0x80);
-      res += char(((ui >> 6) & 0x3f) | 0x80);
-      res += char((ui & 0x3f) | 0x80);
-    }
-    begin ++;
-  }
-}
-
-template <class Uint16ContainerConIter>
-void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
-  res.clear();
-  uint16_t ui;
-  while(begin != end) {
-    ui = *begin;
-    if(ui <= 0x7f) {
-      res += char(ui);
-    } else if(ui <= 0x7ff) {
-      res += char(((ui>>6) & 0x1f) | 0xc0);
-      res += char((ui & 0x3f) | 0x80);
-    } else {
-      res += char(((ui >> 12) & 0x0f )| 0xe0);
-      res += char(((ui>>6) & 0x3f )| 0x80 );
-      res += char((ui & 0x3f) | 0x80);
-    }
-    begin ++;
-  }
-}
-
-
-template <class Uint16Container>
-bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
-  vec.clear();
-  if(!str) {
-    return true;
-  }
-  size_t i = 0;
-  while(i < len) {
-    if(0 == (str[i] & 0x80)) {
-      vec.push_back(uint16_t(str[i]));
-      i++;
-    } else {
-      if(i + 1 < len) { //&& (str[i+1] & 0x80))
-        uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
-        vec.push_back(tmp);
-        i += 2;
-      } else {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-template <class Uint16Container>
-bool GBKTrans(const string& str, Uint16Container& vec) {
-  return GBKTrans(str.c_str(), str.size(), vec);
-}
-
-template <class Uint16ContainerConIter>
-void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
-  res.clear();
-  //pair<char, char> pa;
-  char first, second;
-  while(begin != end) {
-    //pa = uint16ToChar2(*begin);
-    first = ((*begin)>>8) & 0x00ff;
-    second = (*begin) & 0x00ff;
-    if(first & 0x80) {
-      res += first;
-      res += second;
-    } else {
-      res += second;
-    }
-    begin++;
-  }
-}
-
-/*
- * format example: "%Y-%m-%d %H:%M:%S"
- */
-// inline void GetTime(const string& format, string&  timeStr) {
-//   time_t timeNow;
-//   time(&timeNow);
-//   timeStr.resize(64);
-//   size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
-//   timeStr.resize(len);
-// }
-
-inline string PathJoin(const string& path1, const string& path2) {
-  if(EndsWith(path1, "/")) {
-    return path1 + path2;
-  }
-  return path1 + "/" + path2;
-}
-
-}
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/Thread.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Thread.hpp
@ -1,44 +0,0 @@
-#ifndef LIMONP_THREAD_HPP
-#define LIMONP_THREAD_HPP
-
-#include "Logging.hpp"
-#include "NonCopyable.hpp"
-
-namespace limonp {
-
-class IThread: NonCopyable {
- public:
-  IThread(): isStarted(false), isJoined(false) {
-  }
-  virtual ~IThread() {
-    if(isStarted && !isJoined) {
-      XCHECK(!pthread_detach(thread_));
-    }
-  };
-
-  virtual void Run() = 0;
-  void Start() {
-    XCHECK(!isStarted);
-    XCHECK(!pthread_create(&thread_, NULL, Worker, this));
-    isStarted = true;
-  }
-  void Join() {
-    XCHECK(!isJoined);
-    XCHECK(!pthread_join(thread_, NULL));
-    isJoined = true;
-  }
- private:
-  static void * Worker(void * data) {
-    IThread * ptr = (IThread* ) data;
-    ptr->Run();
-    return NULL;
-  }
-
-  pthread_t thread_;
-  bool isStarted;
-  bool isJoined;
-}; // class IThread
-
-} // namespace limonp
-
-#endif // LIMONP_THREAD_HPP
--- a/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp
@ -1,86 +0,0 @@
-#ifndef LIMONP_THREAD_POOL_HPP
-#define LIMONP_THREAD_POOL_HPP
-
-#include "Thread.hpp"
-#include "BlockingQueue.hpp"
-#include "BoundedBlockingQueue.hpp"
-#include "Closure.hpp"
-
-namespace limonp {
-
-using namespace std;
-
-//class ThreadPool;
-class ThreadPool: NonCopyable {
- public:
-  class Worker: public IThread {
-   public:
-    Worker(ThreadPool* pool): ptThreadPool_(pool) {
-      assert(ptThreadPool_);
-    }
-    virtual ~Worker() {
-    }
-
-    virtual void Run() {
-      while (true) {
-        ClosureInterface* closure = ptThreadPool_->queue_.Pop();
-        if (closure == NULL) {
-          break;
-        }
-        try {
-          closure->Run();
-        } catch(std::exception& e) {
-          XLOG(ERROR) << e.what();
-        } catch(...) {
-          XLOG(ERROR) << " unknown exception.";
-        }
-        delete closure;
-      }
-    }
-   private:
-    ThreadPool * ptThreadPool_;
-  }; // class Worker
-
-  ThreadPool(size_t thread_num)
-    : threads_(thread_num), 
-      queue_(thread_num) {
-    assert(thread_num);
-    for(size_t i = 0; i < threads_.size(); i ++) {
-      threads_[i] = new Worker(this);
-    }
-  }
-  ~ThreadPool() {
-    Stop();
-  }
-
-  void Start() {
-    for(size_t i = 0; i < threads_.size(); i++) {
-      threads_[i]->Start();
-    }
-  }
-  void Stop() {
-    for(size_t i = 0; i < threads_.size(); i ++) {
-      queue_.Push(NULL);
-    }
-    for(size_t i = 0; i < threads_.size(); i ++) {
-      threads_[i]->Join();
-      delete threads_[i];
-    }
-    threads_.clear();
-  }
-
-  void Add(ClosureInterface* task) {
-    assert(task);
-    queue_.Push(task);
-  }
-
- private:
-  friend class Worker;
-
-  vector<IThread*> threads_;
-  BoundedBlockingQueue<ClosureInterface*> queue_;
-}; // class ThreadPool
-
-} // namespace limonp
-
-#endif // LIMONP_THREAD_POOL_HPP
--- a/libchinese-segmentation/cppjieba/limonp/limonp.pri
+++ b/libchinese-segmentation/cppjieba/limonp/limonp.pri
@ -1,22 +0,0 @@
-INCLUDEPATH += $$PWD
-
-HEADERS += \
-    $$PWD/ArgvContext.hpp \
-    $$PWD/BlockingQueue.hpp \
-    $$PWD/BoundedBlockingQueue.hpp \
-    $$PWD/BoundedQueue.hpp \
-    $$PWD/Closure.hpp \
-    $$PWD/Colors.hpp \
-    $$PWD/Condition.hpp \
-    $$PWD/Config.hpp \
-    $$PWD/FileLock.hpp \
-    $$PWD/ForcePublic.hpp \
-    $$PWD/LocalVector.hpp \
-    $$PWD/Logging.hpp \
-    $$PWD/Md5.hpp \
-    $$PWD/MutexLock.hpp \
-    $$PWD/NonCopyable.hpp \
-    $$PWD/StdExtension.hpp \
-    $$PWD/StringUtil.hpp \
-    $$PWD/Thread.hpp \
-    $$PWD/ThreadPool.hpp
--- a/libchinese-segmentation/cppjieba/segment-trie/segment-trie.cpp
+++ b/libchinese-segmentation/cppjieba/segment-trie/segment-trie.cpp
@ -1,275 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-#include <cmath>
-#include "segment-trie.h"
-
-DictTrie::DictTrie(const vector<string> file_paths, string dat_cache_path)
-    : StorageBase<DatMemElem, false, DictCacheFileHeader>(file_paths, dat_cache_path)
-{
-    this->Init();
-}
-
-DictTrie::DictTrie(const string &dict_path, const string &user_dict_paths, const string &dat_cache_path)
-    : StorageBase<DatMemElem, false, DictCacheFileHeader>(vector<string>{dict_path, user_dict_paths}, dat_cache_path)
-{
-    this->Init();
-}
-
-void DictTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
-{
-    DictCacheFileHeader header;
-    assert(sizeof(header.md5_hex) == md5.size());
-    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
-    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-    umask(S_IWGRP | S_IWOTH);
-    const int fd =mkstemp((char *)tmp_filepath.data());
-    assert(fd >= 0);
-    fchmod(fd, 0644);
-
-    write_bytes = write(fd, (const char *)&header, sizeof(DictCacheFileHeader));
-
-    this->PreLoad();
-    this->LoadDefaultDict(fd, write_bytes, offset, elements_num);
-    this->LoadUserDict(fd, write_bytes, offset, elements_num);
-
-    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
-
-    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
-    write(fd, &elements_num, sizeof(int));
-    write(fd, &offset, sizeof(int));
-    data_trie_size = this->GetDataTrieSize();
-    write(fd, &data_trie_size, sizeof(int));
-    write(fd, &m_min_weight, sizeof(double));
-
-    close(fd);
-    assert((size_t)write_bytes == sizeof(DictCacheFileHeader) + offset + this->GetDataTrieTotalSize());
-
-    tryRename(tmp_filepath, dat_cache_file);
-}
-
-const DatMemElem * DictTrie::Find(const string &key) const
-{
-    int result = this->ExactMatchSearch(key.c_str(), key.size());
-    if (result < 0)
-        return nullptr;
-    return &this->GetElementPtr()[result];
-}
-
-
-
-void DictTrie::FindDatDag(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<DatDag> &res, size_t max_word_len) const {
-
-    res.clear();
-    res.resize(end - begin);
-
-    string text_str;
-    EncodeRunesToString(begin, end, text_str);
-
-    static const size_t max_num = 128;
-    result_pair_type result_pairs[max_num] = {};
-
-    for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) {
-
-        std::size_t num_results = this->CommonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
-
-        res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + 1, nullptr));
-
-        for (std::size_t idx = 0; idx < num_results; ++idx) {
-            auto & match = result_pairs[idx];
-
-            if ((match.value < 0) || ((size_t)match.value >= this->GetCacheFileHeaderPtr()->elements_size)) {
-                continue;
-            }
-
-            auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
-
-            if (char_num > max_word_len) {
-                continue;
-            }
-
-            const DatMemElem * pValue = &this->GetElementPtr()[match.value];
-
-            if (1 == char_num) {
-                res[i].nexts[0].second = pValue;
-                continue;
-            }
-
-            res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + char_num, pValue));
-        }
-
-        begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune);
-    }
-}
-
-void DictTrie::FindWordRange(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange> &words, size_t max_word_len) const {
-
-    string text_str;
-    EncodeRunesToString(begin, end, text_str);
-
-    static const size_t max_num = 128;
-    result_pair_type result_pairs[max_num] = {};//存放字典查询结果
-    size_t str_size = end - begin;
-    double max_weight[str_size];//存放逆向路径最大weight
-    for (size_t i = 0; i<str_size; i++) {
-        max_weight[i] = -3.14e+100;
-    }
-    size_t max_next[str_size];//存放动态规划后的分词结果
-    //memset(max_next,-1,str_size*sizeof(size_t));
-
-    double val(0);
-    for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
-        size_t nextPos = str_size - i;//逆向计算
-        begin_pos -= (end - i - 1)->len;
-
-        std::size_t num_results = this->CommonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
-        if (0 == num_results) {//字典不存在则单独分词
-            val = GetMinWeight();
-            if (nextPos  < str_size) {
-                val += max_weight[nextPos];
-            }
-            if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
-                max_weight[nextPos - 1] = val;
-                max_next[nextPos - 1] = nextPos;
-            }
-        } else {//字典存在则根据查询结果数量计算最大概率路径
-            for (std::size_t idx = 0; idx < num_results; ++idx) {
-                auto & match = result_pairs[idx];
-                if ((match.value < 0) || ((uint32_t)match.value >= this->GetCacheFileHeaderPtr()->elements_size)) {
-                    continue;
-                }
-                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
-                if (char_num > max_word_len) {
-                    continue;
-                }
-                auto * pValue = &this->GetElementPtr()[match.value];
-
-                val = pValue->weight;
-                if (1 == char_num) {
-                    if (nextPos  < str_size) {
-                        val += max_weight[nextPos];
-                    }
-                    if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
-                        max_weight[nextPos - 1] = val;
-                        max_next[nextPos - 1] = nextPos;
-                    }
-                } else {
-                    if (nextPos - 1 + char_num  < str_size) {
-                        val += max_weight[nextPos - 1 + char_num];
-                    }
-                    if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) {
-                        max_weight[nextPos - 1] = val;
-                        max_next[nextPos - 1] = nextPos - 1 + char_num;
-                    }
-                }
-            }
-        }
-    }
-    for (size_t i = 0; i < str_size;) {//统计动态规划结果
-        assert(max_next[i] > i);
-        assert(max_next[i] <= str_size);
-        WordRange wr(begin + i, begin + max_next[i] - 1);
-        words.push_back(wr);
-        i = max_next[i];
-    }
-}
-
-bool DictTrie::IsUserDictSingleChineseWord(const Rune &word) const {
-    return IsIn(m_user_dict_single_chinese_word, word);
-}
-
-void DictTrie::PreLoad()
-{
-    ifstream ifs(DICT_PATH);
-    string line;
-    vector<string> buf;
-
-    for (; getline(ifs, line);) {
-        if (limonp::StartsWith(line, "#") or line.empty()) {
-            continue;
-        }
-        limonp::Split(line, buf, " ");
-        if (buf.size() != 3)
-            continue;
-        m_freq_sum += atof(buf[1].c_str());
-    }
-}
-
-void DictTrie::LoadDefaultDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
-{
-    ifstream ifs(DICT_PATH);
-    string line;
-    vector<string> buf;
-
-    for (; getline(ifs, line);) {
-        if (limonp::StartsWith(line, "#") or line.empty()) {
-            continue;
-        }
-        limonp::Split(line, buf, " ");
-        if (buf.size() != 3)
-            continue;
-        DatMemElem node_info;
-        node_info.weight = log(atof(buf[1].c_str()) / m_freq_sum);
-        node_info.SetTag(buf[2]);
-        this->Update(buf[0].c_str(), buf[0].size(), elements_num);
-        offset += (sizeof(DatMemElem));
-        elements_num++;
-        if (m_min_weight > node_info.weight) {
-            m_min_weight = node_info.weight;
-        }
-        write_bytes += write(fd, &node_info, sizeof(DatMemElem));
-    }
-}
-
-void DictTrie::LoadUserDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
-{
-    ifstream ifs(USER_DICT_PATH);
-    string line;
-    vector<string> buf;
-    for (; getline(ifs, line);) {
-        if (limonp::StartsWith(line, "#") or line.empty()) {
-            continue;
-        }
-        limonp::Split(line, buf, " ");
-        if (buf.size() != 3)
-            continue;
-        DatMemElem node_info;
-        assert(m_freq_sum > 0.0);
-        const int freq = atoi(buf[1].c_str());
-        node_info.weight = log(1.0 * freq / m_freq_sum);
-        node_info.SetTag(buf[2]);
-        this->Update(buf[0].c_str(), buf[0].size(), elements_num);
-        offset += (sizeof(DatMemElem));
-        elements_num++;
-        write_bytes += write(fd, &node_info, sizeof(DatMemElem));
-        if (Utf8CharNum(buf[0]) == 1) {
-            RuneArray word;
-            if (DecodeRunesInString(buf[0], word)) {
-                m_user_dict_single_chinese_word.insert(word[0]);
-            }
-        }
-    }
-}
-
-inline double DictTrie::GetMinWeight() const
-{
-    return this->GetCacheFileHeaderPtr()->min_weight;
-}
--- a/libchinese-segmentation/cppjieba/segment-trie/segment-trie.h
+++ b/libchinese-segmentation/cppjieba/segment-trie/segment-trie.h
@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-#ifndef SegmentTrie_H
-#define SegmentTrie_H
-
-#include "storage-base.hpp"
-#include "cppjieba/Unicode.hpp"
-
-using namespace cppjieba;
-
-const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
-const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
-
-struct DictCacheFileHeader : CacheFileHeaderBase
-{
-    double min_weight = 0;
-};
-
-class DictTrie : public StorageBase<DatMemElem, false, DictCacheFileHeader>
-{
-public:
-    DictTrie(const vector<string> file_paths, string dat_cache_path = "");
-    DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "");
-    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
-
-    const DatMemElem *Find(const string &key) const;
-    void FindDatDag(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
-              vector<struct DatDag>&res, size_t max_word_len = MAX_WORD_LENGTH) const;
-    void FindWordRange(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
-              vector<WordRange>& words, size_t max_word_len = MAX_WORD_LENGTH) const;
-    bool IsUserDictSingleChineseWord(const Rune& word) const;
-
-private:
-    DictTrie();
-    void PreLoad();
-    void LoadDefaultDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
-    void LoadUserDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
-    double GetMinWeight() const;
-
-    double m_freq_sum = 0.0;
-    double m_min_weight = 3.14e+100;
-    unordered_set<Rune> m_user_dict_single_chinese_word;
-};
-
-#endif // SegmentTrie_H
--- a/libchinese-segmentation/development-files/header-files/ChineseSegmentation
+++ b/libchinese-segmentation/development-files/header-files/ChineseSegmentation
@ -1 +0,0 @@
-#include "chinese-segmentation.h"
--- a/libchinese-segmentation/development-files/header-files/HanZiToPinYin
+++ b/libchinese-segmentation/development-files/header-files/HanZiToPinYin
@ -1 +0,0 @@
-#include "hanzi-to-pinyin.h"
--- a/libchinese-segmentation/dict/README.md
+++ b/libchinese-segmentation/dict/README.md
@ -1,31 +0,0 @@
-# CppJieba字典
-
-文件后缀名代表的是词典的编码方式。
-比如filename.utf8 是 utf8编码，filename.gbk 是 gbk编码方式。
-
-
-## 分词
-
-### jieba.dict.utf8/gbk
-
-作为最大概率法(MPSegment: Max Probability)分词所使用的词典。
-
-### hmm_model.utf8/gbk
-
-作为隐式马尔科夫模型(HMMSegment: Hidden Markov Model)分词所使用的词典。
-
-__对于MixSegment(混合MPSegment和HMMSegment两者)则同时使用以上两个词典__
-
-
-## 关键词抽取
-
-### idf.utf8
-
-IDF(Inverse Document Frequency)
-在KeywordExtractor中，使用的是经典的TF-IDF算法，所以需要这么一个词典提供IDF信息。
-
-### stop_words.utf8
-
-停用词词典
-
-
--- a/libchinese-segmentation/dict/hmm_model.utf8
+++ b/libchinese-segmentation/dict/hmm_model.utf8
--- a/libchinese-segmentation/dict/idf.utf8
+++ b/libchinese-segmentation/dict/idf.utf8
--- a/libchinese-segmentation/dict/jieba.dict.utf8
+++ b/libchinese-segmentation/dict/jieba.dict.utf8
--- a/libchinese-segmentation/dict/pinyinWithoutTone.txt
+++ b/libchinese-segmentation/dict/pinyinWithoutTone.txt
--- a/libchinese-segmentation/dict/pos_dict/char_state_tab.utf8
+++ b/libchinese-segmentation/dict/pos_dict/char_state_tab.utf8
--- a/libchinese-segmentation/dict/pos_dict/prob_emit.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_emit.utf8
--- a/libchinese-segmentation/dict/pos_dict/prob_start.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_start.utf8
@ -1,259 +0,0 @@
-#初始状态的概率
-#格式
-#状态:概率
-B,a:-4.7623052146
-B,ad:-6.68006603678
-B,ag:-3.14e+100
-B,an:-8.69708322302
-B,b:-5.01837436211
-B,bg:-3.14e+100
-B,c:-3.42388018495
-B,d:-3.97504752976
-B,df:-8.88897423083
-B,dg:-3.14e+100
-B,e:-8.56355183039
-B,en:-3.14e+100
-B,f:-5.49163041848
-B,g:-3.14e+100
-B,h:-13.53336513
-B,i:-6.11578472756
-B,in:-3.14e+100
-B,j:-5.05761912847
-B,jn:-3.14e+100
-B,k:-3.14e+100
-B,l:-4.90588358466
-B,ln:-3.14e+100
-B,m:-3.6524299819
-B,mg:-3.14e+100
-B,mq:-6.7869530014
-B,n:-1.69662577975
-B,ng:-3.14e+100
-B,nr:-2.23104959138
-B,nrfg:-5.87372217541
-B,nrt:-4.98564273352
-B,ns:-2.8228438315
-B,nt:-4.84609166818
-B,nz:-3.94698846058
-B,o:-8.43349870215
-B,p:-4.20098413209
-B,q:-6.99812385896
-B,qe:-3.14e+100
-B,qg:-3.14e+100
-B,r:-3.40981877908
-B,rg:-3.14e+100
-B,rr:-12.4347528413
-B,rz:-7.94611647157
-B,s:-5.52267359084
-B,t:-3.36474790945
-B,tg:-3.14e+100
-B,u:-9.1639172775
-B,ud:-3.14e+100
-B,ug:-3.14e+100
-B,uj:-3.14e+100
-B,ul:-3.14e+100
-B,uv:-3.14e+100
-B,uz:-3.14e+100
-B,v:-2.67405848743
-B,vd:-9.04472876024
-B,vg:-3.14e+100
-B,vi:-12.4347528413
-B,vn:-4.33156108902
-B,vq:-12.1470707689
-B,w:-3.14e+100
-B,x:-3.14e+100
-B,y:-9.84448567586
-B,yg:-3.14e+100
-B,z:-7.04568111149
-B,zg:-3.14e+100
-E,a:-3.14e+100
-E,ad:-3.14e+100
-E,ag:-3.14e+100
-E,an:-3.14e+100
-E,b:-3.14e+100
-E,bg:-3.14e+100
-E,c:-3.14e+100
-E,d:-3.14e+100
-E,df:-3.14e+100
-E,dg:-3.14e+100
-E,e:-3.14e+100
-E,en:-3.14e+100
-E,f:-3.14e+100
-E,g:-3.14e+100
-E,h:-3.14e+100
-E,i:-3.14e+100
-E,in:-3.14e+100
-E,j:-3.14e+100
-E,jn:-3.14e+100
-E,k:-3.14e+100
-E,l:-3.14e+100
-E,ln:-3.14e+100
-E,m:-3.14e+100
-E,mg:-3.14e+100
-E,mq:-3.14e+100
-E,n:-3.14e+100
-E,ng:-3.14e+100
-E,nr:-3.14e+100
-E,nrfg:-3.14e+100
-E,nrt:-3.14e+100
-E,ns:-3.14e+100
-E,nt:-3.14e+100
-E,nz:-3.14e+100
-E,o:-3.14e+100
-E,p:-3.14e+100
-E,q:-3.14e+100
-E,qe:-3.14e+100
-E,qg:-3.14e+100
-E,r:-3.14e+100
-E,rg:-3.14e+100
-E,rr:-3.14e+100
-E,rz:-3.14e+100
-E,s:-3.14e+100
-E,t:-3.14e+100
-E,tg:-3.14e+100
-E,u:-3.14e+100
-E,ud:-3.14e+100
-E,ug:-3.14e+100
-E,uj:-3.14e+100
-E,ul:-3.14e+100
-E,uv:-3.14e+100
-E,uz:-3.14e+100
-E,v:-3.14e+100
-E,vd:-3.14e+100
-E,vg:-3.14e+100
-E,vi:-3.14e+100
-E,vn:-3.14e+100
-E,vq:-3.14e+100
-E,w:-3.14e+100
-E,x:-3.14e+100
-E,y:-3.14e+100
-E,yg:-3.14e+100
-E,z:-3.14e+100
-E,zg:-3.14e+100
-M,a:-3.14e+100
-M,ad:-3.14e+100
-M,ag:-3.14e+100
-M,an:-3.14e+100
-M,b:-3.14e+100
-M,bg:-3.14e+100
-M,c:-3.14e+100
-M,d:-3.14e+100
-M,df:-3.14e+100
-M,dg:-3.14e+100
-M,e:-3.14e+100
-M,en:-3.14e+100
-M,f:-3.14e+100
-M,g:-3.14e+100
-M,h:-3.14e+100
-M,i:-3.14e+100
-M,in:-3.14e+100
-M,j:-3.14e+100
-M,jn:-3.14e+100
-M,k:-3.14e+100
-M,l:-3.14e+100
-M,ln:-3.14e+100
-M,m:-3.14e+100
-M,mg:-3.14e+100
-M,mq:-3.14e+100
-M,n:-3.14e+100
-M,ng:-3.14e+100
-M,nr:-3.14e+100
-M,nrfg:-3.14e+100
-M,nrt:-3.14e+100
-M,ns:-3.14e+100
-M,nt:-3.14e+100
-M,nz:-3.14e+100
-M,o:-3.14e+100
-M,p:-3.14e+100
-M,q:-3.14e+100
-M,qe:-3.14e+100
-M,qg:-3.14e+100
-M,r:-3.14e+100
-M,rg:-3.14e+100
-M,rr:-3.14e+100
-M,rz:-3.14e+100
-M,s:-3.14e+100
-M,t:-3.14e+100
-M,tg:-3.14e+100
-M,u:-3.14e+100
-M,ud:-3.14e+100
-M,ug:-3.14e+100
-M,uj:-3.14e+100
-M,ul:-3.14e+100
-M,uv:-3.14e+100
-M,uz:-3.14e+100
-M,v:-3.14e+100
-M,vd:-3.14e+100
-M,vg:-3.14e+100
-M,vi:-3.14e+100
-M,vn:-3.14e+100
-M,vq:-3.14e+100
-M,w:-3.14e+100
-M,x:-3.14e+100
-M,y:-3.14e+100
-M,yg:-3.14e+100
-M,z:-3.14e+100
-M,zg:-3.14e+100
-S,a:-3.90253968313
-S,ad:-11.0484584802
-S,ag:-6.95411391796
-S,an:-12.8402179494
-S,b:-6.47288876397
-S,bg:-3.14e+100
-S,c:-4.78696679586
-S,d:-3.90391976418
-S,df:-3.14e+100
-S,dg:-8.9483976513
-S,e:-5.94251300628
-S,en:-3.14e+100
-S,f:-5.19482024998
-S,g:-6.50782681533
-S,h:-8.65056320738
-S,i:-3.14e+100
-S,in:-3.14e+100
-S,j:-4.91199211964
-S,jn:-3.14e+100
-S,k:-6.94032059583
-S,l:-3.14e+100
-S,ln:-3.14e+100
-S,m:-3.26920065212
-S,mg:-10.8253149289
-S,mq:-3.14e+100
-S,n:-3.85514838976
-S,ng:-4.9134348611
-S,nr:-4.48366310396
-S,nrfg:-3.14e+100
-S,nrt:-3.14e+100
-S,ns:-3.14e+100
-S,nt:-12.1470707689
-S,nz:-3.14e+100
-S,o:-8.46446092775
-S,p:-2.98684018136
-S,q:-4.88865861826
-S,qe:-3.14e+100
-S,qg:-3.14e+100
-S,r:-2.76353367841
-S,rg:-10.2752685919
-S,rr:-3.14e+100
-S,rz:-3.14e+100
-S,s:-3.14e+100
-S,t:-3.14e+100
-S,tg:-6.27284253188
-S,u:-6.94032059583
-S,ud:-7.72823016105
-S,ug:-7.53940370266
-S,uj:-6.85251045118
-S,ul:-8.41537131755
-S,uv:-8.15808672229
-S,uz:-9.29925862537
-S,v:-3.05329230341
-S,vd:-3.14e+100
-S,vg:-5.94301818437
-S,vi:-3.14e+100
-S,vn:-11.4539235883
-S,vq:-3.14e+100
-S,w:-3.14e+100
-S,x:-8.42741965607
-S,y:-6.19707946995
-S,yg:-13.53336513
-S,z:-3.14e+100
-S,zg:-3.14e+100
--- a/libchinese-segmentation/dict/pos_dict/prob_trans.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_trans.utf8
--- a/libchinese-segmentation/dict/stop_words.utf8
+++ b/libchinese-segmentation/dict/stop_words.utf8
--- a/libchinese-segmentation/dict/user.dict.utf8
+++ b/libchinese-segmentation/dict/user.dict.utf8
@ -1,4 +0,0 @@
-云计算
-韩玉鉴赏
-蓝翔 nz
-区块链 10 nz
--- a/libchinese-segmentation/hanzi-to-pinyin-private.h
+++ b/libchinese-segmentation/hanzi-to-pinyin-private.h
@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-
-#ifndef HANZITOPINYINPRIVATE_H
-#define HANZITOPINYINPRIVATE_H
-
-#include <QtCore/qglobal.h>
-#include <QHash>
-#include "pinyin4cpp_dictTrie.h"
-#include "hanzi-to-pinyin.h"
-#include "pinyin4cpp-trie.h"
-
-using namespace std;
-
-static const QHash<QString, QString> PhoneticSymbol = {
-    {"ā", "a1"}, {"á", "a2"}, {"ǎ", "a3"}, {"à", "a4"},
-    {"ē", "e1"}, {"é", "e2"}, {"ě", "e3"}, {"è", "e4"},
-    {"ō", "o1"}, {"ó", "o2"}, {"ǒ", "o3"}, {"ò", "o4"},
-    {"ī", "i1"}, {"í", "i2"}, {"ǐ", "i3"}, {"ì", "i4"},
-    {"ū", "u1"}, {"ú", "u2"}, {"ǔ", "u3"}, {"ù", "u4"},
-    // üe
-    {"ü", "v"},
-    {"ǖ", "v1"}, {"ǘ", "v2"}, {"ǚ", "v3"}, {"ǜ", "v4"},
-    {"ń", "n2"}, {"ň", "n3"}, {"ǹ", "n4"},
-    {"m̄", "m1"}, {"ḿ", "m2"}, {"m̀", "m4"},
-    {"ê̄", "ê1"}, {"ế", "ê2"}, {"ê̌", "ê3"}, {"ề", "ê4"}
-};
-
-#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
-
-class PINYINMANAGER_EXPORT HanZiToPinYinPrivate
-{
-public:
-    HanZiToPinYinPrivate(HanZiToPinYin *parent = nullptr);
-    ~HanZiToPinYinPrivate();
-
-public:
-    template <typename T>
-    bool isMultiTone(T &&t) {return m_pinYinTrie.IsMultiTone(std::forward<T>(t));}
-
-    bool contains(string &word);
-    int getResults(string &word, QStringList &results);
-    void setConfig(PinyinDataStyle dataStyle, SegType segType, PolyphoneType polyphoneType, ExDataProcessType processType);
-
-private:
-    void convertDataStyle(QStringList &results);
-
-    HanZiToPinYin *q = nullptr;
-    //Pinyin4cppDictTrie *m_pinYinTrie = nullptr;
-    Pinyin4cppTrie m_pinYinTrie;
-
-    SegType m_segType = SegType::Segmentation;
-    PolyphoneType m_polyphoneType = PolyphoneType::Disable;
-    PinyinDataStyle m_pinyinDataStyle = PinyinDataStyle::Default;
-    ExDataProcessType m_exDataProcessType = ExDataProcessType::Default;
-};
-#endif // HANZITOPINYINPRIVATE_H
--- a/libchinese-segmentation/hanzi-to-pinyin.cpp
+++ b/libchinese-segmentation/hanzi-to-pinyin.cpp
@ -1,360 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-
-#include <mutex>
-#include <cctype>
-#include "hanzi-to-pinyin.h"
-#include "hanzi-to-pinyin-private.h"
-#include "chinese-segmentation.h"
-#include "cppjieba/Unicode.hpp"
-
-HanZiToPinYin * HanZiToPinYin::g_pinYinManager = nullptr;
-std::once_flag g_singleFlag;
-
-bool HanZiToPinYinPrivate::contains(string &word)
-{
-    return m_pinYinTrie.Contains(word);
-}
-
-int HanZiToPinYinPrivate::getResults(string &word, QStringList &results)
-{
-    results.clear();
-
-    string directResult = m_pinYinTrie.Find(word);
-
-    if (directResult == string()) {
-        if (m_segType == SegType::NoSegmentation) {//无分词、无结果直接返回-1
-            return -1;
-        } else {//无结果、启用分词
-            vector<string> segResults = ChineseSegmentation::getInstance()->callMixSegmentCutStr(word);
-            string data;
-            for (string &info : segResults) {
-                if (info == string()) {
-                    continue;
-                }
-                data = m_pinYinTrie.Find(info);
-                if (data == string()) {//分词后无结果
-                    if (cppjieba::IsSingleWord(info)) {//单个字符
-                        if (m_exDataProcessType == ExDataProcessType::Default) {//原数据返回
-                            results.append(QString().fromStdString(info));
-                        } else if (m_exDataProcessType == ExDataProcessType::Delete) {//忽略
-                            continue;
-                        }
-                    } else {//多个字符
-                        string oneWord;
-                        cppjieba::RuneStrArray runeArray;
-                        cppjieba::DecodeRunesInString(info, runeArray);
-                        for (auto i = runeArray.begin(); i != runeArray.end(); ++i) {
-                            oneWord = cppjieba::GetStringFromRunes(info, i, i);
-                            data = m_pinYinTrie.Find(oneWord);
-                            if (data == string()) {//单字无结果则按设置返回
-                                if (m_exDataProcessType == ExDataProcessType::Default) {//原数据返回
-                                    results.append(QString().fromStdString(oneWord));
-                                } else if (m_exDataProcessType == ExDataProcessType::Delete) {//忽略
-                                    continue;
-                                }
-                            }
-                            if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
-                                results.append(QString().fromStdString(data));
-                            } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
-                                if (limonp::IsInStr(data, ',')) {
-                                    results.append(QString().fromStdString(data.substr(0, data.find_first_of(",", 0))));
-                                } else {
-                                    results.append(QString().fromStdString(data));
-                                }
-                            }
-                        }
-                    }
-                } else {//分词后有结果
-                    if (cppjieba::IsSingleWord(info)) {//单个字符
-                        if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
-                            results.append(QString().fromStdString(data));
-                        } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
-                            if (limonp::IsInStr(data, ',')) {
-                                results.append(QString().fromStdString(data.substr(0, data.find_first_of(",", 0))));
-                            } else {
-                                results.append(QString().fromStdString(data));
-                            }
-                        }
-                    } else {//多个字符
-                        vector<string> dataVec = limonp::Split(data, "/");
-                        if (dataVec.size() == 1) {//无多音词
-                            vector<string> dataVec = limonp::Split(data, ",");
-                            for (auto &oneResult : dataVec) {
-                                results.append(QString().fromStdString(oneResult));
-                            }
-                        } else {
-                            if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
-                                int wordSize = limonp::Split(dataVec[0], ",").size();
-                                for (int i = 0; i < wordSize; ++i) {
-                                    QStringList oneResult;
-                                    for (size_t j = 0; j < dataVec.size(); ++j) {
-                                        oneResult.append(QString().fromStdString(limonp::Split(dataVec[j], ",")[i]));
-                                    }
-                                    results.append(oneResult.join('/'));
-                                }
-                            } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
-                                vector<string> tmp = limonp::Split(dataVec[0], ",");
-                                for (auto &oneResult : tmp) {
-                                    results.append(QString().fromStdString(oneResult));
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    } else {//可以直接查到结果
-        if (cppjieba::IsSingleWord(word)) {//单个字符
-            if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
-                results.append(QString().fromStdString(directResult));
-            } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
-                if (limonp::IsInStr(directResult, ',')) {
-                    results.append(QString().fromStdString(directResult.substr(0, directResult.find_first_of(",", 0))));
-                } else {
-                    results.append(QString().fromStdString(directResult));
-                }
-            }
-        } else {//多个字符
-            vector<string> dataVec = limonp::Split(directResult, "/");
-            if (dataVec.size() == 1) {//无多音词
-                vector<string> dataVec = limonp::Split(directResult, ",");
-                for (auto &oneResult : dataVec) {
-                    results.append(QString().fromStdString(oneResult));
-                }
-            } else {
-                if (m_polyphoneType == PolyphoneType::Enable) {//启用多音字
-                    int wordSize = limonp::Split(dataVec[0], ",").size();
-                    for (int i = 0; i < wordSize; ++i) {
-                        QStringList oneResult;
-                        for (size_t j = 0; j < dataVec.size(); ++j) {
-                            oneResult.append(QString().fromStdString(limonp::Split(dataVec[j], ",")[i]));
-                        }
-                        results.append(oneResult.join('/'));
-                    }
-                } else if (m_polyphoneType == PolyphoneType::Disable) {//不启用多音字
-                    vector<string> tmp = limonp::Split(dataVec[0], ",");
-                    for (auto &oneResult : tmp) {
-                        results.append(QString().fromStdString(oneResult));
-                    }
-                }
-            }
-        }
-    }
-    convertDataStyle(results);
-    return 0;//todo
-}
-
-void HanZiToPinYinPrivate::setConfig(PinyinDataStyle dataStyle, SegType segType, PolyphoneType polyphoneType, ExDataProcessType processType)
-{
-    m_pinyinDataStyle = dataStyle;
-    m_segType = segType;
-    m_polyphoneType = polyphoneType;
-    m_exDataProcessType = processType;
-}
-
-void HanZiToPinYinPrivate::convertDataStyle(QStringList &results)
-{
-    QString value;
-    if (m_pinyinDataStyle == PinyinDataStyle::Default) {
-        for (QString &info : results) {
-            if(info == ",") {
-                continue;
-            }
-            //if info's length was been changed, there's someting wrong while traverse the chars of info
-            for (const QChar &c : info) {
-                if (!isalpha(c.toLatin1())) {
-                    value = PhoneticSymbol.value(c);
-                    if (!value.isEmpty()) {
-                        info.replace(c, value.at(0));
-                    }
-                }
-            }
-
-            QStringList tmpList = info.split(',', QString::SkipEmptyParts); //去重(保持原顺序)
-            QStringList tmpValue;
-            for (auto &str : tmpList) {
-                if (!tmpValue.contains(str)) {
-                    tmpValue.push_back(str);
-                }
-            }
-            info = tmpValue.join(",");
-        }
-    } else if (m_pinyinDataStyle == PinyinDataStyle::Tone) {
-        //无需处理
-    } else if (m_pinyinDataStyle == PinyinDataStyle::Tone2) {
-        for (QString &info : results) {
-            for (int i = 0; i < info.size();) {
-                auto c = info.at(i);
-                if (!isalpha(c.toLatin1())) {
-                    value = PhoneticSymbol.value(c);
-                    if (!value.isEmpty()) {
-                        info.replace(c, PhoneticSymbol.value(c));
-                        i += PhoneticSymbol.value(c).size();
-                        continue;
-                    }
-                }
-                i++;
-            }
-        }
-    } else if (m_pinyinDataStyle == PinyinDataStyle::Tone3) {
-        for (QString &info : results) {
-            if(info == "/") {
-                continue;
-            }
-            bool isPolyphoneWords(false);
-            if (info.contains("/")) {
-                isPolyphoneWords = true;
-                info.replace("/", ",");
-            }
-
-            for (int i = 0; i < info.size();) {
-                auto c = info.at(i);
-                if (!isalpha(c.toLatin1())) {
-                    value = PhoneticSymbol.value(c);
-                    if (!value.isEmpty()) {
-                        info.replace(i, 1, value.at(0));
-                        //多音词模式
-                        if (info.contains(",")) {
-                            int pos = info.indexOf(',', i);
-                            if (isPolyphoneWords) {
-                                info.replace(",", "/");
-                            }
-                            //最后一个读音时
-                            if (pos == -1) {
-                                info.append(value.at(1));
-                                break;
-                            }
-                            info.insert(pos, value.at(1));
-                            i = pos + 1;    //insert导致','的位置加一，将i行进到','的位置
-                            i++;
-                            continue;
-                        } else {
-                            info.append(value.at(1));
-                            break;
-                        }
-                    }
-                }
-                i++;
-            }
-
-        }
-    } else if (m_pinyinDataStyle == PinyinDataStyle::FirstLetter) {
-        for (QString &info : results) {
-            if(info == "," or info == "/") {
-                continue;
-            }
-
-            bool isPolyphoneWords(false);
-            if (info.contains("/")) {
-                isPolyphoneWords = true;
-                info.replace("/", ",");
-            }
-
-            for (int i = 0; i < info.size();i++) {
-                auto c = info.at(i);
-                if (!isalpha(c.toLatin1())) {
-                    value = PhoneticSymbol.value(c);
-                    if (!value.isEmpty()) {
-                        info.replace(c, value.at(0));
-                    }
-                }
-            }
-
-            QStringList tmpList = info.split(',', QString::SkipEmptyParts); //去重(保持原顺序)
-            QStringList tmpValue;
-            for (auto &str : tmpList) {
-                if (!tmpValue.contains(str)) {
-                    tmpValue.push_back(str.at(0));
-                }
-            }
-            if (isPolyphoneWords) {
-                info = tmpValue.join("/");
-            } else {
-                info = tmpValue.join(",");
-            }
-        }
-    } else if (m_pinyinDataStyle == PinyinDataStyle::English) {
-        //暂不支持
-    }
-}
-
-HanZiToPinYinPrivate::HanZiToPinYinPrivate(HanZiToPinYin *parent) : q(parent)
-{
-    //const char * const  SINGLE_WORD_PINYIN_PATH = "/usr/share/ukui-search/res/dict/singleWordPinyin.txt";
-    //const char * const  WORDS_PINYIN_PATH = "/usr/share/ukui-search/res/dict/wordsPinyin.txt";
-    //m_pinYinTrie = new Pinyin4cppDictTrie(SINGLE_WORD_PINYIN_PATH, WORDS_PINYIN_PATH);
-    //m_pinYinTrie = new Pinyin4cppTrie;
-}
-
-HanZiToPinYinPrivate::~HanZiToPinYinPrivate()
-{
-//    if (m_pinYinTrie){
-//        delete m_pinYinTrie;
-//        m_pinYinTrie = nullptr;
-//    }
-}
-
-HanZiToPinYin * HanZiToPinYin::getInstance()
-{
-    call_once(g_singleFlag, []() {
-        g_pinYinManager = new HanZiToPinYin;
-    });
-    return g_pinYinManager;
-}
-
-bool HanZiToPinYin::contains(string &word)
-{
-    return d->contains(word);
-}
-
-bool HanZiToPinYin::isMultiTone(string &word)
-{
-    return d->isMultiTone(word);
-}
-
-bool HanZiToPinYin::isMultiTone(string &&word)
-{
-    return d->isMultiTone(word);
-}
-
-bool HanZiToPinYin::isMultiTone(const string &word)
-{
-    return d->isMultiTone(word);
-}
-
-bool HanZiToPinYin::isMultiTone(const string &&word)
-{
-    return d->isMultiTone(word);
-}
-
-int HanZiToPinYin::getResults(string word, QStringList &results)
-{
-    return d->getResults(word, results);
-}
-
-void HanZiToPinYin::setConfig(PinyinDataStyle dataStyle, SegType segType, PolyphoneType polyphoneType, ExDataProcessType processType)
-{
-    d->setConfig(dataStyle, segType, polyphoneType, processType);
-}
-
-HanZiToPinYin::HanZiToPinYin() : d(new HanZiToPinYinPrivate)
-{
-}
--- a/libchinese-segmentation/hanzi-to-pinyin.h
+++ b/libchinese-segmentation/hanzi-to-pinyin.h
@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-
-#ifndef HANZITOPINYIN_H
-#define HANZITOPINYIN_H
-
-#include <QtCore/qglobal.h>
-#include <QStringList>
-#include "pinyin4cpp-common.h"
-#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
-
-using namespace std;
-
-class HanZiToPinYinPrivate;
-class PINYINMANAGER_EXPORT HanZiToPinYin
-{
-public:
-    static HanZiToPinYin * getInstance();
-
-public:
-    /**
-     * @brief HanZiToPinYin::isMultiTone 判断是否为多音字/词/句
-     * @param word 要判断的字/词/句
-     * @return bool 不是返回false
-     */
-    bool isMultiTone(string &word);
-    bool isMultiTone(string &&word);
-    bool isMultiTone(const string &word);
-    bool isMultiTone(const string &&word);
-
-    /**
-     * @brief HanZiToPinYin::contains 查询某个字/词/句是否有拼音（是否在数据库包含）
-     * @param word 要查询的字/词/句
-     * @return bool 数据库不包含返回false
-     */
-    bool contains(string &word);
-
-    /**
-     * @brief HanZiToPinYin::getResults 获取某个字/词/句的拼音
-     * @param word 要获取拼音的字/词/句
-     * @param results word的拼音列表（有可能多音字），每次调用results会被清空
-     * @return int 获取到返回0，否则返回-1
-     */
-    int getResults(string word, QStringList &results);
-
-    /**
-     * @brief setConfig 设置HanZiToPinYin的各项功能，详见pinyin4cpp-common.h
-     * @param dataStyle 返回数据风格，默认defult
-     * @param segType 是否启用分词，默认启用
-     * @param polyphoneType 是否启用多音字，默认不启用
-     * @param processType 无拼音数据处理模式，默认defult
-     */
-    void setConfig(PinyinDataStyle dataStyle,SegType segType,PolyphoneType polyphoneType,ExDataProcessType processType);
-
-protected:
-    HanZiToPinYin();
-    ~HanZiToPinYin();
-    HanZiToPinYin(const HanZiToPinYin&) = delete;
-    HanZiToPinYin& operator =(const HanZiToPinYin&) = delete;
-private:
-    static HanZiToPinYin *g_pinYinManager;
-    HanZiToPinYinPrivate *d = nullptr;
-};
-
-#endif // PINYINMANAGER_H
--- a/libchinese-segmentation/libchinese-segmentation.pro
+++ b/libchinese-segmentation/libchinese-segmentation.pro
@ -1,84 +0,0 @@
-QT -= gui
-
-VERSION = 1.1.0
-TARGET =  chinese-segmentation
-TEMPLATE = lib
-DEFINES += LIBCHINESESEGMENTATION_LIBRARY
-DEFINES += VERSION='\\"$${VERSION}\\"'
-
-CONFIG += c++11 create_pc create_prl no_install_prl
-
-# The following define makes your compiler emit warnings if you use
-# any Qt feature that has been marked deprecated (the exact warnings
-# depend on your compiler). Please consult the documentation of the
-# deprecated API in order to know how to port your code away from it.
-DEFINES += QT_DEPRECATED_WARNINGS
-QMAKE_CXXFLAGS += -Werror=return-type -Werror=return-local-addr
-#QMAKE_CXXFLAGS += -Werror=uninitialized
-QMAKE_CXXFLAGS += -execution-charset:utf-8
-
-# You can also make your code fail to compile if it uses deprecated APIs.
-# In order to do so, uncomment the following line.
-# You can also select to disable deprecated APIs only up to a certain version of Qt.
-#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
-include(cppjieba/cppjieba.pri)
-include(pinyin4cpp/pinyin4cpp.pri)
-include(Traditional-Chinese-Simplified-conversion/Traditional2Simplified.pri)
-include(storage-base/storage-base-cedar.pri)
-
-#LIBS += -L/usr/local/lib/libjemalloc -ljemalloc
-
-SOURCES += \
-    chinese-segmentation.cpp \
-    hanzi-to-pinyin.cpp \
-    Traditional-to-Simplified.cpp
-
-HEADERS += \
-    chinese-segmentation-private.h \
-    chinese-segmentation.h \
-    common-struct.h \
-    hanzi-to-pinyin-private.h \
-    hanzi-to-pinyin.h \
-    Traditional-to-Simplified-private.h \
-    Traditional-to-Simplified.h \
-    pinyin4cpp-common.h \
-    libchinese-segmentation_global.h
-
-dict_files.path = /usr/share/ukui-search/res/dict/
-dict_files.files = $$PWD/dict/*.utf8\
-dict_files.files += $$PWD/dict/pos_dict/*.utf8\
-dict_files.files += $$PWD/dict/*.txt\
-dict_files.files += $$PWD/pinyin4cpp/dict/*.txt\
-dict_files.files += $$PWD/Traditional-Chinese-Simplified-conversion/dict/*.txt
-
-INSTALLS += \
-    dict_files \
-
-# Default rules for deployment.
-unix {
-    target.path = $$[QT_INSTALL_LIBS]
-    QMAKE_PKGCONFIG_NAME = chinese-segmentation
-    QMAKE_PKGCONFIG_DESCRIPTION = chinese-segmentation Header files
-    QMAKE_PKGCONFIG_VERSION = $$VERSION
-    QMAKE_PKGCONFIG_LIBDIR = $$target.path
-    QMAKE_PKGCONFIG_DESTDIR = pkgconfig
-    QMAKE_PKGCONFIG_INCDIR = /usr/include/chinese-seg
-    QMAKE_PKGCONFIG_CFLAGS += -I/usr/include/chinese-seg
-
-!isEmpty(target.path): INSTALLS += target
-
-    header.path = /usr/include/chinese-seg
-    header.files += chinese-segmentation.h libchinese-segmentation_global.h common-struct.h hanzi-to-pinyin.h pinyin4cpp-common.h Traditional-to-Simplified.h
-    header.files += development-files/header-files/*
-#    headercppjieba.path = /usr/include/chinese-seg/cppjieba/
-#    headercppjieba.files = cppjieba/*
-    INSTALLS += header
-}
-
-
-#DISTFILES += \
-#    jiaba/jieba.pri
-
-DISTFILES += \
-    development-files/header-files/* \
-    pinyin4cpp/pinyin4cpp.pri
--- a/libchinese-segmentation/libchinese-segmentation_global.h
+++ b/libchinese-segmentation/libchinese-segmentation_global.h
@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2020, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: zhangzihao <zhangzihao@kylinos.cn>
- * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
- *
- */
-#ifndef CHINESESEGMENTATION_GLOBAL_H
-#define CHINESESEGMENTATION_GLOBAL_H
-
-#include <QtCore/qglobal.h>
-
-#if defined(CHINESESEGMENTATION_LIBRARY)
-#  define CHINESESEGMENTATION_EXPORT Q_DECL_EXPORT
-#else
-#  define CHINESESEGMENTATION_EXPORT Q_DECL_IMPORT
-#endif
-
-#endif // CHINESESEGMENTATION_GLOBAL_H
--- a/libchinese-segmentation/pinyin4cpp-common.h
+++ b/libchinese-segmentation/pinyin4cpp-common.h
@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-
-#ifndef PINYIN4CPP_COMMON_H
-#define PINYIN4CPP_COMMON_H
-
-/**
- * @brief The PinyinDataStyle enum
- * Default 默认模式，“中心” return “zhong xin”
- * Tone 带读音模式 #“中心” return “zhōng xīn”
- * Tone2 带读音模式2 #“中心” return “zho1ng xi1n”
- * Tone3 带读音模式3 #“中心” return “zhong1 xin1”
- * FirstLetter 首字母模式 #“中心” return “z x”
- * English 英文翻译模式(暂不支持) #“中心” return “center,heart,core”
- */
-enum class PinyinDataStyle {
-    Default       = 1u << 0,
-    Tone          = 1u << 1,
-    Tone2         = 1u << 2,
-    Tone3         = 1u << 3,
-    FirstLetter   = 1u << 4,
-    English       = 1u << 5
-};
-
-/**
- * @brief The SegType enum
- * Segmentation 默认带分词 #“银河麒麟”->“银河”“麒麟”
- * NoSegmentation 无分词模式 #“银河麒麟”
- */
-enum class SegType {
-    Segmentation    = 1u << 0,
-    NoSegmentation  = 1u << 1
-};
-
-/**
- * @brief The PolyphoneType enum
- * Disable 默认不启用多音字，“奇安信”return “qi an xin”多音字按照常用读音返回
- * Enable 启用多音字 “奇安信” return“qi,ji an xin”
- * 注意：多音词返回格式为 “朝阳” return "zhao/chao yang/yang"
- */
-enum class PolyphoneType {
-    Disable       = 1u << 0,
-    Enable        = 1u << 1
-};
-
-/**
- * @brief The ExDataProcessType enum
- * Default 默认无拼音数据直接返回，“123木头人” return "123 mu tou ren"（分词模式）
- * Delete  删除多余数据，#“123木头人” return "mu tou ren"（分词模式）
- */
-enum class ExDataProcessType {
-    Default       = 1u << 0,
-    Delete        = 1u << 1
-};
-
-#endif //PINYIN4CPP_COMMON_H
--- a/libchinese-segmentation/pinyin4cpp/dict/singleWordPinyin.txt
+++ b/libchinese-segmentation/pinyin4cpp/dict/singleWordPinyin.txt
--- a/libchinese-segmentation/pinyin4cpp/dict/wordsPinyin.txt
+++ b/libchinese-segmentation/pinyin4cpp/dict/wordsPinyin.txt
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.cpp
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.cpp
@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-#include "pinyin4cpp-trie.h"
-
-Pinyin4cppTrie::Pinyin4cppTrie(string dat_cache_path)
-    : StorageBase<char, false, CacheFileHeaderBase>(vector<string>{SINGLE_WORD_PINYIN_PATH, WORDS_PINYIN_PATH}, dat_cache_path)
-{
-    this->Init();
-}
-
-Pinyin4cppTrie::Pinyin4cppTrie(const vector<string> file_paths, string dat_cache_path)
-    : StorageBase<char, false, CacheFileHeaderBase>(file_paths, dat_cache_path)
-{
-    this->Init();
-}
-
-
-
-bool Pinyin4cppTrie::Contains(string &word) {
-    if (this->Find(word) != string())
-        return true;
-    return false;
-}
-
-bool Pinyin4cppTrie::IsMultiTone(const string &word) {
-    string result = this->Find(word);
-    if (result.find(",") == result.npos)
-        return true;
-    return false;
-}
-
-void Pinyin4cppTrie::LoadSourceFile(const string &dat_cache_file, const string &md5)
-{
-    CacheFileHeaderBase header;
-    assert(sizeof(header.md5_hex) == md5.size());
-    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-    int offset(0), elements_num(0), write_bytes(0), data_trie_size(0);
-    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-    umask(S_IWGRP | S_IWOTH);
-    const int fd =mkstemp((char *)tmp_filepath.data());
-    assert(fd >= 0);
-    fchmod(fd, 0644);
-
-    write_bytes = write(fd, (const char *)&header, sizeof(CacheFileHeaderBase));
-
-    this->LoadSingleWordDict(fd, write_bytes, offset, elements_num);
-    this->LoadWordsDict(fd, write_bytes, offset, elements_num);
-
-    write_bytes += write(fd, this->GetDataTrieArray(), this->GetDataTrieTotalSize());
-
-    lseek(fd, sizeof(header.md5_hex), SEEK_SET);
-    write(fd, &elements_num, sizeof(int));
-    write(fd, &offset, sizeof(int));
-    data_trie_size = this->GetDataTrieSize();
-    write(fd, &data_trie_size, sizeof(int));
-
-    close(fd);
-    assert((size_t)write_bytes == sizeof(CacheFileHeaderBase) + offset + this->GetDataTrieTotalSize());
-
-    tryRename(tmp_filepath, dat_cache_file);
-}
-
-string Pinyin4cppTrie::Find(const string &key)
-{
-    int result = this->ExactMatchSearch(key.c_str(), key.size());
-    if (result < 0)
-        return string();
-    return string(&this->GetElementPtr()[result]);
-}
-
-void Pinyin4cppTrie::LoadSingleWordDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
-{
-    ifstream ifs(SINGLE_WORD_PINYIN_PATH);
-    string line;
-    vector<string> buf;
-
-    for (; getline(ifs, line);) {
-        if (limonp::StartsWith(line, "#") or line.empty()) {
-            continue;
-        }
-        limonp::Split(line, buf, ":");
-        if (buf.size() != 3)
-            continue;
-        this->Update(buf[2].c_str(), buf[2].size(), offset);
-        offset += (buf[1].size() + 1);
-        elements_num++;
-        write_bytes += write(fd, buf[1].c_str(), buf[1].size() + 1);
-    }
-}
-
-void Pinyin4cppTrie::LoadWordsDict(const int &fd, int &write_bytes, int &offset, int &elements_num)
-{
-    ifstream ifs(WORDS_PINYIN_PATH);
-    string line;
-    vector<string> buf;
-    for (; getline(ifs, line);) {
-        if (limonp::StartsWith(line, "#") or line.empty()) {
-            continue;
-        }
-        limonp::Split(line, buf, ":");
-        if (buf.size() != 2)
-            continue;
-        this->Update(buf[0].c_str(), buf[0].size(), offset);
-        offset += (buf[1].size() + 1);
-        elements_num++;
-        write_bytes += write(fd, buf[1].c_str(), buf[1].size() + 1);
-    }
-}
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.h
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp-trie.h
@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-#ifndef PINYIN4CPPTRIE_H
-#define PINYIN4CPPTRIE_H
-
-#include "storage-base.hpp"
-
-const char * const  SINGLE_WORD_PINYIN_PATH = "/usr/share/ukui-search/res/dict/singleWordPinyin.txt";
-const char * const  WORDS_PINYIN_PATH = "/usr/share/ukui-search/res/dict/wordsPinyin.txt";
-
-class Pinyin4cppTrie : public StorageBase<char, false, CacheFileHeaderBase>
-{
-public:
-    Pinyin4cppTrie(string dat_cache_path = "");
-    Pinyin4cppTrie(const vector<string> file_paths, string dat_cache_path = "");
-    void LoadSourceFile(const string &dat_cache_file, const string &md5) override;
-    string Find(const string &key);
-    bool Contains(string &word);
-    bool IsMultiTone(const string &word);
-
-private:
-    void LoadSingleWordDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
-    void LoadWordsDict(const int &fd, int &write_bytes, int &offset, int &elements_num);
-};
-
-#endif // PINYIN4CPPTRIE_H
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp.pri
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp.pri
@ -1,15 +0,0 @@
-INCLUDEPATH += $$PWD
-
-HEADERS += \
-    $$PWD/pinyin4cpp-trie.h \
-    $$PWD/pinyin4cpp_dataTrie.h \
-    $$PWD/pinyin4cpp_dictTrie.h
-
-SOURCES += \
-    $$PWD/pinyin4cpp-trie.cpp \
-    $$PWD/pinyin4cpp_dataTrie.cpp \
-    $$PWD/pinyin4cpp_dictTrie.cpp
-
-DISTFILES += \
-    pinyin4cpp/dict/wordsPinyin.txt \
-    pinyin4cpp/dict/singleWordPinyin.txt
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.cpp
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.cpp
@ -1,135 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-
-#include "pinyin4cpp_dataTrie.h"
-
-Pinyin4cppDataTrie::Pinyin4cppDataTrie()
-{
-
-}
-
-Pinyin4cppDataTrie::~Pinyin4cppDataTrie()
-{
-    munmap(m_mmapAddr, m_mmapLength);
-    m_mmapAddr = nullptr;
-    close(m_mmapFd);
-    m_mmapFd = -1;
-}
-
-string Pinyin4cppDataTrie::Find(const string &key) const {
-//    darts-clone的接口方法
-    Darts::DoubleArray::result_pair_type find_result;
-    m_DoubleArrayDataTrie.exactMatchSearch(key.c_str(), find_result);
-    if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= m_elementsSize)) {//todo
-        return string();
-    }
-    return string(&m_elementsPtr[find_result.value]);
-
-//  cedarpp的接口方法
-//    int result = m_DoubleArrayDataTrie.exactMatchSearch<int>(key.c_str(), key.size());
-//    if (result < 0)
-//        return string();
-//    return string(&m_elementsPtr[result]);
-
-}
-
-bool Pinyin4cppDataTrie::InitBuildDat(map<string, string> &elements, const string &dat_cache_file, const string &md5) {
-    BuildDatCache(elements, dat_cache_file, md5);
-    return InitAttachDat(dat_cache_file, md5);
-}
-
-bool Pinyin4cppDataTrie::InitAttachDat(const string &dat_cache_file, const string &md5) {
-    m_mmapFd = open(dat_cache_file.c_str(), O_RDONLY);
-
-    if (m_mmapFd < 0) {
-        return false;
-    }
-
-    const auto seek_off = lseek(m_mmapFd, 0, SEEK_END);
-    assert(seek_off >= 0);
-
-    m_mmapLength = static_cast<size_t>(seek_off);
-    m_mmapAddr = reinterpret_cast<char *>(mmap(NULL, m_mmapLength, PROT_READ, MAP_SHARED, m_mmapFd, 0));
-    assert(MAP_FAILED != m_mmapAddr);
-    assert(m_mmapLength >= sizeof(CacheFileHeader));
-
-    CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(m_mmapAddr);
-    m_elementsNum = header.elements_num;
-    m_elementsSize = header.elements_size;
-    assert(sizeof(header.md5_hex) == md5.size());
-
-    if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
-        return false;
-    }
-
-    assert(m_mmapLength == sizeof(CacheFileHeader) + header.elements_size  + header.dat_size * m_DoubleArrayDataTrie.unit_size());
-
-    m_elementsPtr = (const char *)(m_mmapAddr + sizeof(CacheFileHeader));
-    const char * dat_ptr = m_mmapAddr + sizeof(CacheFileHeader) + header.elements_size;
-    m_DoubleArrayDataTrie.set_array((char *)dat_ptr, header.dat_size);
-    return true;
-}
-
-void Pinyin4cppDataTrie::BuildDatCache(map<string, string> &elements, const string &dat_cache_file, const string &md5) {
-    vector<const char*> keys_ptr_vec;
-    vector<int> values_vec;
-    vector<string> mem_elem_vec;
-
-    keys_ptr_vec.reserve(elements.size());
-    values_vec.reserve(elements.size());
-    mem_elem_vec.reserve(elements.size());
-
-    CacheFileHeader header;
-    assert(sizeof(header.md5_hex) == md5.size());
-    memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-    int offset(0);
-    for (auto &info:elements) {
-        keys_ptr_vec.push_back(info.first.c_str());
-        values_vec.push_back(offset);
-        offset += (info.second.size() + 1);//+1指字符串后加\0
-        assert(info.second.size() > 0);
-        mem_elem_vec.push_back(info.second);
-    }
-
-    auto const ret = m_DoubleArrayDataTrie.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
-    assert(0 == ret);
-    header.elements_num = mem_elem_vec.size();
-    header.elements_size = offset;
-    header.dat_size = m_DoubleArrayDataTrie.size();
-
-    string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-    umask(S_IWGRP | S_IWOTH);
-    const int fd =mkstemp((char *)tmp_filepath.data());
-    assert(fd >= 0);
-    fchmod(fd, 0644);
-
-    auto write_bytes = write(fd, (const char *)&header, sizeof(header));
-    for (size_t i = 0; i < elements.size(); ++i) {
-        write_bytes += write(fd, mem_elem_vec[i].c_str(), mem_elem_vec[i].size() + 1);
-    }
-    write_bytes += write(fd, m_DoubleArrayDataTrie.array(), m_DoubleArrayDataTrie.total_size());
-
-    assert((size_t)write_bytes == sizeof(header) + offset + m_DoubleArrayDataTrie.total_size());
-    close(fd);
-
-    const auto rename_ret = rename(tmp_filepath.c_str(), dat_cache_file.c_str());
-    assert(0 == rename_ret);
-}
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.h
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dataTrie.h
@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-#ifndef PINYIN4cpp_DATATRIE_H
-#define PINYIN4cpp_DATATRIE_H
-
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <QDebug>
-#include "Md5.hpp"
-#include "LocalVector.hpp"
-#include "StringUtil.hpp"
-//#define USE_REDUCED_TRIE
-#include "../storage-base/cedar/cedar.h"
-#include "../storage-base/darts-clone/darts.h"
-
-using namespace std;
-using std::pair;
-
-struct CacheFileHeader { //todo 字节对齐
-    char md5_hex[32] = {};
-    uint32_t elements_num = 0;
-    uint32_t elements_size = 0;
-    uint32_t dat_size = 0;
-};
-
-class Pinyin4cppDataTrie {
-public:
-    Pinyin4cppDataTrie();
-    ~Pinyin4cppDataTrie();
-
-    string Find(const string & key) const;
-
-    bool InitBuildDat(map<string, string>& elements, const string & dat_cache_file, const string & md5);
-
-    bool InitAttachDat(const string & dat_cache_file, const string & md5);
-
-private:
-    void BuildDatCache(map<string, string>& elements, const string & dat_cache_file, const string & md5);
-
-    Pinyin4cppDataTrie(const Pinyin4cppDataTrie &);
-    Pinyin4cppDataTrie &operator=(const Pinyin4cppDataTrie &);
-
-private:
-    Darts::DoubleArray m_DoubleArrayDataTrie;
-    //cedar::da<int, -1, -2, true> m_DoubleArrayDataTrie;
-    const char * m_elementsPtr = nullptr;
-    size_t m_elementsNum = 0;
-    size_t m_elementsSize = 0;
-    size_t m_mmapLength = 0;
-
-    int    m_mmapFd = -1;
-    char * m_mmapAddr = nullptr;
-};
-
-#endif //PINYIN4cpp_DATATRIE_H
--- a/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dictTrie.cpp
+++ b/libchinese-segmentation/pinyin4cpp/pinyin4cpp_dictTrie.cpp
@ -1,156 +0,0 @@
-/*
- * Copyright (C) 2022, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: jixiaoxu <jixiaoxu@kylinos.cn>
- *
- */
-
-#include "pinyin4cpp_dictTrie.h"
-#include "malloc.h"
-
-Pinyin4cppDictTrie::Pinyin4cppDictTrie(const string &single_word_dict_path, const string &words_dict_paths, const string &dat_cache_path) {
-    Init(single_word_dict_path, words_dict_paths, dat_cache_path);
-}
-
-string Pinyin4cppDictTrie::Find(const string &word) const {
-    return m_DataTrie.Find(word);
-}
-
-bool Pinyin4cppDictTrie::Contains(string &word) {
-    if (m_DataTrie.Find(word) != string())
-        return true;
-    return false;
-}
-
-bool Pinyin4cppDictTrie::IsMultiTone(const string &word) {
-    string result = m_DataTrie.Find(word);
-    if (result.find(",") == result.npos)
-        return true;
-    return false;
-}
-
-size_t Pinyin4cppDictTrie::GetTotalDictSize() const {
-    return m_TotalDictSize_;
-}
-
-void Pinyin4cppDictTrie::Init(const string &single_word_dict_path, const string &words_dict_paths, string dat_cache_path) {
-    const auto dict_list = single_word_dict_path + "|" + words_dict_paths;
-    size_t file_size_sum = 0;
-    const string md5 = CalcFileListMD5(dict_list, file_size_sum);
-    m_TotalDictSize_ = file_size_sum;
-
-    if (dat_cache_path.empty()) {
-        dat_cache_path = "/tmp/" + md5 + ".dat_cache";//未指定词库数据文件存储位置的默认存储在tmp目录下
-    }
-    qDebug() << "#####Pinyin Dict path:" << dat_cache_path.c_str();
-    if (m_DataTrie.InitAttachDat(dat_cache_path, md5)) {
-        return;
-    }
-
-    LoadSingleWordDict(single_word_dict_path);
-    LoadWordsDict(words_dict_paths);
-    bool build_ret = m_DataTrie.InitBuildDat(m_StaticNodeInfos, dat_cache_path, md5);
-    assert(build_ret);
-    m_StaticNodeInfos.clear();
-    malloc_trim(0);
-}
-
-void Pinyin4cppDictTrie::LoadSingleWordDict(const string &filePath) {
-    ifstream ifs(filePath.c_str());
-    string line;
-    vector<string> buf;
-
-    for (; getline(ifs, line);) {
-        if (limonp::StartsWith(line, "#")) {
-            continue;
-        }
-        limonp::Split(line, buf, ":");
-        assert(buf.size() == SINGLE_WORD_DICT_COLUMN_NUM);
-        if (m_StaticNodeInfos.find(buf[2]) != m_StaticNodeInfos.end()) {
-            vector<string> tmp;
-            bool isfind(false);
-            limonp::Split(m_StaticNodeInfos[buf[2]], tmp, ",");
-            for (auto &onePinyin:tmp) {
-                if (onePinyin == buf[1]) {
-                    isfind = true;
-                    break;
-                }
-            }
-            if (!isfind) {
-                m_StaticNodeInfos[buf[2]] += ("," + buf[2]);
-            }
-        } else {
-            m_StaticNodeInfos[buf[2]] = buf[1];
-        }
-    }
-}
-
-void Pinyin4cppDictTrie::LoadWordsDict(const string &filePath) {
-    ifstream ifs(filePath.c_str());
-    string line;
-    vector<string> buf;
-    for (; getline(ifs, line);) {
-        if (limonp::StartsWith(line, "#")) {
-            continue;
-        }
-        limonp::Split(line, buf, ":");
-        assert(buf.size() == WORDS_DICT_COLUMN_NUM);
-        if (m_StaticNodeInfos.find(buf[0]) != m_StaticNodeInfos.end()) {
-            vector<string> tmp;
-            bool isfind(false);
-            limonp::Split(m_StaticNodeInfos[buf[0]], tmp, "/");
-            for (auto &onePinyin:tmp) {
-                if (onePinyin == buf[1]) {
-                    isfind = true;
-                    break;
-                }
-            }
-            if (!isfind) {
-                m_StaticNodeInfos[buf[0]] += ("/" + buf[1]);
-            }
-        } else {
-            m_StaticNodeInfos[buf[0]] = buf[1];
-        }
-    }
-}
-
-string CalcFileListMD5(const string &files_list, size_t &file_size_sum) {
-    limonp::MD5 md5;
-
-    const auto files = limonp::Split(files_list, "|;");
-    file_size_sum = 0;
-
-    for (auto const & local_path : files) {
-        const int fd = open(local_path.c_str(), O_RDONLY);
-        if (fd < 0){
-            continue;
-        }
-        auto const len = lseek(fd, 0, SEEK_END);
-        if (len > 0) {
-            void * addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
-            assert(MAP_FAILED != addr);
-
-            md5.Update((unsigned char *) addr, len);
-            file_size_sum += len;
-
-            munmap(addr, len);
-        }
-        close(fd);
-    }
-
-    md5.Final();
-    return string(md5.digestChars);
-}
--- a/Show More
+++ b/Show More
				`@ -1 +0,0 @@`
				`libsearch/.qm/*.qm usr/share/ukui-search/translations`
				`@ -0,0 +1 @@`
				`Subproject commit 4734827d7c31936f1485e4513316b05cb7c8714f`