Add chinese-segmentation submodule.

2022-06-20 14:12:31 +08:00 · 2022-06-20 14:12:31 +08:00 · 8d40e8a7b2
parent 1512502cea
commit 8d40e8a7b2
64 changed files with 4 additions and 670851 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +1,6 @@
 [submodule "debian"]
 	path = debian
 	url = http://gitlab2.kylin.com/kylin-desktop/ukui-search-debian.git
+[submodule "libchinese-segmentation"]
+	path = libchinese-segmentation
+	url = http://gitlab2.kylin.com/iaom/chinese-segmentation.git
--- a/1
+++ b/1
@ -0,0 +1 @@
+Subproject commit 02216728e0cf1f1304e97f7fc1f7b56f4ddc5872
--- a/libchinese-segmentation/chinese-segmentation-private.h
+++ b/libchinese-segmentation/chinese-segmentation-private.h
@ -1,33 +0,0 @@
-#ifndef CHINESESEGMENTATIONPRIVATE_H
-#define CHINESESEGMENTATIONPRIVATE_H
-
-#include "chinese-segmentation.h"
-#include "cppjieba/Jieba.hpp"
-#include "cppjieba/KeywordExtractor.hpp"
-
-class ChineseSegmentationPrivate
-{
-public:
-    explicit ChineseSegmentationPrivate(ChineseSegmentation *parent = nullptr);
-    ~ChineseSegmentationPrivate();
-    vector<KeyWord> callSegment(const string& sentence);
-
-    vector<string> callMixSegmentCutStr(const string& sentence);
-    vector<Word> callMixSegmentCutWord(const string& sentence);
-    string lookUpTagOfWord(const string& word);
-    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
-
-    vector<Word> callFullSegment(const string& sentence);
-
-    vector<Word> callQuerySegment(const string& sentence);
-
-    vector<Word> callHMMSegment(const string& sentence);
-
-    vector<Word> callMPSegment(const string& sentence);
-
-private:
-    cppjieba::Jieba *m_jieba;
-    ChineseSegmentation *q = nullptr;
-};
-
-#endif // CHINESESEGMENTATIONPRIVATE_H
--- a/libchinese-segmentation/chinese-segmentation.cpp
+++ b/libchinese-segmentation/chinese-segmentation.cpp
@ -1,162 +0,0 @@
-/*
- * Copyright (C) 2020, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: zhangzihao <zhangzihao@kylinos.cn>
- * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
- *
- */
-#include "chinese-segmentation.h"
-#include "chinese-segmentation-private.h"
-
-ChineseSegmentationPrivate::ChineseSegmentationPrivate(ChineseSegmentation *parent) : q(parent)
-{
-    const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
-    const char * const  HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
-    const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
-    const char * const  IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
-    const char * const  STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
-    m_jieba = new cppjieba::Jieba(DICT_PATH,
-                                  HMM_PATH,
-                                  USER_DICT_PATH,
-                                  IDF_PATH,
-                                  STOP_WORD_PATH,
-                                  "");
-}
-
-ChineseSegmentationPrivate::~ChineseSegmentationPrivate() {
-    if(m_jieba)
-        delete m_jieba;
-    m_jieba = nullptr;
-}
-
-vector<KeyWord> ChineseSegmentationPrivate::callSegment(const string &sentence) {
-    const size_t topk = -1;
-    vector<KeyWord> keywordres;
-    ChineseSegmentationPrivate::m_jieba->extractor.Extract(sentence, keywordres, topk);
-
-    return keywordres;
-
-}
-
-vector<string> ChineseSegmentationPrivate::callMixSegmentCutStr(const string &sentence)
-{
-    vector<string> keywordres;
-    ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
-    return keywordres;
-}
-
-vector<Word> ChineseSegmentationPrivate::callMixSegmentCutWord(const string &sentence)
-{
-    vector<Word> keywordres;
-    ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
-    return keywordres;
-}
-
-string ChineseSegmentationPrivate::lookUpTagOfWord(const string &word)
-{
-    return ChineseSegmentationPrivate::m_jieba->LookupTag(word);
-}
-
-vector<pair<string, string>> ChineseSegmentationPrivate::getTagOfWordsInSentence(const string &sentence)
-{
-     vector<pair<string, string>> words;
-     ChineseSegmentationPrivate::m_jieba->Tag(sentence, words);
-     return words;
-}
-
-vector<Word> ChineseSegmentationPrivate::callFullSegment(const string &sentence)
-{
-    vector<Word> keywordres;
-    ChineseSegmentationPrivate::m_jieba->CutAll(sentence, keywordres);
-    return keywordres;
-}
-
-vector<Word> ChineseSegmentationPrivate::callQuerySegment(const string &sentence)
-{
-    vector<Word> keywordres;
-    ChineseSegmentationPrivate::m_jieba->CutForSearch(sentence, keywordres);
-    return keywordres;
-}
-
-vector<Word> ChineseSegmentationPrivate::callHMMSegment(const string &sentence)
-{
-    vector<Word> keywordres;
-    ChineseSegmentationPrivate::m_jieba->CutHMM(sentence, keywordres);
-    return keywordres;
-}
-
-vector<Word> ChineseSegmentationPrivate::callMPSegment(const string &sentence)
-{
-    size_t maxWordLen = 512;
-    vector<Word> keywordres;
-    ChineseSegmentationPrivate::m_jieba->CutSmall(sentence, keywordres, maxWordLen);
-    return keywordres;
-}
-
-ChineseSegmentation *ChineseSegmentation::getInstance()
-{
-    static ChineseSegmentation *global_instance_chinese_segmentation = new ChineseSegmentation;
-    return global_instance_chinese_segmentation;
-}
-
-vector<KeyWord> ChineseSegmentation::callSegment(const string &sentence)
-{
-    return d->callSegment(sentence);
-}
-
-vector<string> ChineseSegmentation::callMixSegmentCutStr(const string &sentence)
-{
-    return d->callMixSegmentCutStr(sentence);
-}
-
-vector<Word> ChineseSegmentation::callMixSegmentCutWord(const string &str)
-{
-    return d->callMixSegmentCutWord(str);
-}
-
-string ChineseSegmentation::lookUpTagOfWord(const string &word)
-{
-    return d->lookUpTagOfWord(word);
-}
-
-vector<pair<string, string> > ChineseSegmentation::getTagOfWordsInSentence(const string &sentence)
-{
-    return d->getTagOfWordsInSentence(sentence);
-}
-
-vector<Word> ChineseSegmentation::callFullSegment(const string &sentence)
-{
-    return d->callFullSegment(sentence);
-}
-
-vector<Word> ChineseSegmentation::callQuerySegment(const string &sentence)
-{
-    return d->callQuerySegment(sentence);
-}
-
-vector<Word> ChineseSegmentation::callHMMSegment(const string &sentence)
-{
-    return d->callHMMSegment(sentence);
-}
-
-vector<Word> ChineseSegmentation::callMPSegment(const string &sentence)
-{
-    return d->callMPSegment(sentence);
-}
-
-ChineseSegmentation::ChineseSegmentation() : d(new ChineseSegmentationPrivate)
-{
-}
--- a/libchinese-segmentation/chinese-segmentation.h
+++ b/libchinese-segmentation/chinese-segmentation.h
@ -1,116 +0,0 @@
-/*
- * Copyright (C) 2020, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: zhangzihao <zhangzihao@kylinos.cn>
- * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
- *
- */
-#ifndef CHINESESEGMENTATION_H
-#define CHINESESEGMENTATION_H
-
-#include "libchinese-segmentation_global.h"
-#include "common-struct.h"
-
-class ChineseSegmentationPrivate;
-class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
-public:
-    static ChineseSegmentation *getInstance();
-
-    /**
-     * @brief ChineseSegmentation::callSegment
-     * 调用extractor进行关键词提取，先使用Mix方式初步分词，再使用Idf词典进行关键词提取，只包含两字以上关键词
-     *
-     * @param sentence 要提取关键词的句子
-     * @return vector<KeyWord> 存放提取后关键词的信息的容器
-     */
-    vector<KeyWord> callSegment(const string &sentence);
-
-    /**
-     * @brief ChineseSegmentation::callMixSegmentCutStr
-     * 使用Mix方法进行分词，即先使用最大概率法MP初步分词，再用隐式马尔科夫模型HMM进一步分词，可以准确切出词典已有词和未登录词，结果比较准确
-     *
-     * @param sentence 要分词的句子
-     * @return vector<string> 只存放分词后每个词的内容的容器
-     */
-    vector<string> callMixSegmentCutStr(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callMixSegmentCutWord
-     * 和callMixSegmentCutStr功能相同
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callMixSegmentCutWord(const string& str);
-
-    /**
-     * @brief ChineseSegmentation::lookUpTagOfWord
-     * 查询word的词性
-     * @param word 要查询词性的词
-     * @return string word的词性
-     */
-    string lookUpTagOfWord(const string& word);
-
-    /**
-     * @brief ChineseSegmentation::getTagOfWordsInSentence
-     * 使用Mix分词后获取每个词的词性
-     * @param sentence 要分词的句子
-     * @return vector<pair<string, string>> 分词后的每个词的内容(firsr)和其对应的词性(second)
-     */
-    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
-
-    /**
-     * @brief ChineseSegmentation::callFullSegment
-     * 使用Full进行分词，Full会切出字典里所有的词。
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callFullSegment(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callQuerySegment
-     * 使用Query进行分词，即先使用Mix，对于长词再用Full，结果最精确，但词的数量也最大
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callQuerySegment(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callHMMSegment
-     * 使用隐式马尔科夫模型HMM进行分词
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callHMMSegment(const string& sentence);
-
-    /**
-     * @brief ChineseSegmentation::callMPSegment
-     * 使用最大概率法MP进行分词
-     * @param sentence 要分词的句子
-     * @return vector<Word> 存放分词后每个词所有信息的容器
-     */
-    vector<Word> callMPSegment(const string& sentence);
-
-private:
-    explicit ChineseSegmentation();
-    ~ChineseSegmentation() = default;
-    ChineseSegmentation(const ChineseSegmentation&) = delete;
-    ChineseSegmentation& operator =(const ChineseSegmentation&) = delete;
-
-private:
-    ChineseSegmentationPrivate *d = nullptr;
-};
-
-#endif // CHINESESEGMENTATION_H
--- a/libchinese-segmentation/common-struct.h
+++ b/libchinese-segmentation/common-struct.h
@ -1,52 +0,0 @@
-#ifndef COMMONSTRUCT_H
-#define COMMONSTRUCT_H
-
-#include <string>
-#include <vector>
-
-using namespace std;
-
-/**
- * @brief The KeyWord struct
- *
- * @property word the content of keyword
- * @property offsets the Unicode offsets, can be used to check the word pos in a sentence
- * @property weight the weight of the keyword
- */
-
-struct KeyWord {
-    string word;
-    vector<size_t> offsets;
-    double weight;
-    ~KeyWord() {
-        word = std::move("");
-        offsets.clear();
-        offsets.shrink_to_fit();
-    }
-};
-
-/**
- * @brief The Word struct
- *
- * @property word the content of word
- * @property offset the offset of the word(absolute pos, Chinese 3 , English 1)， can be used to check the word pos in a sentence
- * @property unicode_offset the Unicode offset of the word
- * @property unicode_length the Unicode length of the word
- */
-struct Word {
-    string word;
-    uint32_t offset;
-    uint32_t unicode_offset;
-    uint32_t unicode_length;
-    Word(const string& w, uint32_t o)
-        : word(w), offset(o) {
-    }
-    Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
-        : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
-    }
-    ~Word() {
-        word = std::move("");
-    }
-}; // struct Word
-
-#endif // COMMONSTRUCT_H
--- a/libchinese-segmentation/cppjieba/DatTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DatTrie.hpp
@ -1,634 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <QDebug>
-
-#include <algorithm>
-#include <utility>
-
-#include "limonp/Md5.hpp"
-#include "Unicode.hpp"
-#include "darts.h"
-
-namespace cppjieba {
-
-using std::pair;
-
-struct DatElement {
-    string word;
-    string tag;
-    double weight = 0;
-
-    bool operator < (const DatElement & b) const {
-        if (word == b.word) {
-            return this->weight > b.weight;
-        }
-
-        return this->word < b.word;
-    }
-};
-
-struct IdfElement {
-    string word;
-    double idf = 0;
-
-    bool operator < (const IdfElement & b) const {
-        if (word == b.word) {
-            return this->idf > b.idf;
-        }
-
-        return this->word < b.word;
-    }
-};
-
-struct PinYinElement
-{
-    string word;
-    string tag;
-
-    bool operator < (const DatElement & b) const {
-        return this->word < b.word;
-    }
-};
-
-inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
-    return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
-}
-
-struct DatMemElem {
-    double weight = 0.0;
-    char tag[8] = {};
-
-    void SetTag(const string & str) {
-        memset(&tag[0], 0, sizeof(tag));
-        strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
-    }
-
-    string GetTag() const {
-        return &tag[0];
-    }
-};
-
-struct PinYinMemElem {
-    char tag[6] = {};
-
-    void SetTag(const string & str) {
-        memset(&tag[0], 0, sizeof(tag));
-        strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
-    }
-
-    string GetTag() const {
-        return &tag[0];
-    }
-};
-
-inline std::ostream & operator << (std::ostream& os, const DatMemElem & elem) {
-    return os << "/tag=" << elem.GetTag() << "/weight=" << elem.weight;
-}
-
-struct DatDag {
-    limonp::LocalVector<pair<size_t, const DatMemElem *> > nexts;
-    double max_weight;
-    int max_next;
-};
-
-typedef Darts::DoubleArray JiebaDAT;
-
-
-struct CacheFileHeader {
-    char md5_hex[32] = {};
-    double min_weight = 0;
-    uint32_t elements_num = 0;
-    uint32_t dat_size = 0;
-};
-
-static_assert(sizeof(DatMemElem) == 16, "DatMemElem length invalid");
-static_assert((sizeof(CacheFileHeader) % sizeof(DatMemElem)) == 0, "DatMemElem CacheFileHeader length equal");
-
-
-class DatTrie {
-public:
-    DatTrie() {}
-    ~DatTrie() {
-        ::munmap(mmap_addr_, mmap_length_);
-        mmap_addr_ = nullptr;
-        mmap_length_ = 0;
-
-        ::close(mmap_fd_);
-        mmap_fd_ = -1;
-    }
-
-    const DatMemElem * Find(const string & key) const {
-        JiebaDAT::result_pair_type find_result;
-        dat_.exactMatchSearch(key.c_str(), find_result);
-
-        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
-            return nullptr;
-        }
-
-        return &elements_ptr_[ find_result.value ];
-    }
-
-    const double Find(const string & key, std::size_t length, std::size_t node_pos) const {
-        JiebaDAT::result_pair_type find_result;
-        dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos);
-
-        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
-            return -1;
-        }
-
-        return idf_elements_ptr_[ find_result.value ];
-    }
-
-    const PinYinMemElem * PinYinFind(const string & key) const {
-        JiebaDAT::result_pair_type find_result;
-        dat_.exactMatchSearch(key.c_str(), find_result);
-
-        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
-            return nullptr;
-        }
-
-        return &pinyin_elements_ptr_[ find_result.value ];
-    }
-
-    void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
-              vector<struct DatDag>&res, size_t max_word_len) const {
-
-        res.clear();
-        res.resize(end - begin);
-
-        string text_str;
-        EncodeRunesToString(begin, end, text_str);
-
-        static const size_t max_num = 128;
-        JiebaDAT::result_pair_type result_pairs[max_num] = {};
-
-        for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) {
-
-            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
-
-            res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + 1, nullptr));
-
-            for (std::size_t idx = 0; idx < num_results; ++idx) {
-                auto & match = result_pairs[idx];
-
-                if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
-                    continue;
-                }
-
-                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
-
-                if (char_num > max_word_len) {
-                    continue;
-                }
-
-                auto pValue = &elements_ptr_[match.value];
-
-                if (1 == char_num) {
-                    res[i].nexts[0].second = pValue;
-                    continue;
-                }
-
-                res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + char_num, pValue));
-            }
-
-            begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune);
-        }
-    }
-
-    /*
-    void Find_Reverse(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
-              vector<struct DatDag>&res, size_t max_word_len) const {
-
-        res.clear();
-        res.resize(end - begin);
-
-        string text_str;
-        EncodeRunesToString(begin, end, text_str);
-
-        static const size_t max_num = 128;
-        JiebaDAT::result_pair_type result_pairs[max_num] = {};
-
-        size_t str_size = end - begin;
-        for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
-
-            begin_pos -= (end - i - 1)->len;
-            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
-            res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - i, nullptr));
-
-            for (std::size_t idx = 0; idx < num_results; ++idx) {
-                auto & match = result_pairs[idx];
-                if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
-                    continue;
-                }
-
-                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
-
-                if (char_num > max_word_len) {
-                    continue;
-                }
-
-                auto pValue = &elements_ptr_[match.value];
-
-                if (1 == char_num) {
-                    res[str_size - i - 1].nexts[0].second = pValue;
-                    continue;
-                }
-
-                res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - 1 - i + char_num, pValue));
-            }
-        }
-    }*/
-
-    void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
-              vector<WordRange>& words, size_t max_word_len) const {
-
-        string text_str;
-        EncodeRunesToString(begin, end, text_str);
-
-        static const size_t max_num = 128;
-        JiebaDAT::result_pair_type result_pairs[max_num] = {};//存放字典查询结果
-        size_t str_size = end - begin;
-        double max_weight[str_size];//存放逆向路径最大weight
-        for (size_t i = 0; i<str_size; i++) {
-            max_weight[i] = -3.14e+100;
-        }
-        int max_next[str_size];//存放动态规划后的分词结果
-        memset(max_next,-1,str_size);
-
-        double val(0);
-        for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
-            size_t nextPos = str_size - i;//逆向计算
-            begin_pos -= (end - i - 1)->len;
-
-            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
-            if (0 == num_results) {//字典不存在则单独分词
-                val = min_weight_;
-
-                if (nextPos  < str_size) {
-                    val += max_weight[nextPos];
-                }
-                if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
-                    max_weight[nextPos - 1] = val;
-                    max_next[nextPos - 1] = nextPos;
-                }
-            } else {//字典存在则根据查询结果数量计算最大概率路径
-                for (std::size_t idx = 0; idx < num_results; ++idx) {
-                    auto & match = result_pairs[idx];
-                    if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
-                        continue;
-                    }
-                    auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
-                    if (char_num > max_word_len) {
-                        continue;
-                    }
-                    auto pValue = &elements_ptr_[match.value];
-
-                    val = pValue->weight;
-                    if (1 == char_num) {
-                        if (nextPos  < str_size) {
-                            val += max_weight[nextPos];
-                        }
-                        if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
-                            max_weight[nextPos - 1] = val;
-                            max_next[nextPos - 1] = nextPos;
-                        }
-                    } else {
-                        if (nextPos - 1 + char_num  < str_size) {
-                            val += max_weight[nextPos - 1 + char_num];
-                        }
-                        if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) {
-                            max_weight[nextPos - 1] = val;
-                            max_next[nextPos - 1] = nextPos - 1 + char_num;
-                        }
-                    }
-                }
-            }
-        }
-        for (size_t i = 0; i < str_size;) {//统计动态规划结果
-            assert(max_next[i] > i);
-            assert(max_next[i] <= str_size);
-            WordRange wr(begin + i, begin + max_next[i] - 1);
-            words.push_back(wr);
-            i = max_next[i];
-        }
-    }
-    double GetMinWeight() const {
-        return min_weight_;
-    }
-
-    void SetMinWeight(double d) {
-        min_weight_ = d ;
-    }
-
-    bool InitBuildDat(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
-        BuildDatCache(elements, dat_cache_file, md5);
-        return InitAttachDat(dat_cache_file, md5);
-    }
-
-    bool InitBuildDat(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
-        BuildDatCache(elements, dat_cache_file, md5);
-        return InitIdfAttachDat(dat_cache_file, md5);
-    }
-
-    bool InitBuildDat(vector<PinYinElement>& elements, const string & dat_cache_file, const string & md5) {
-        BuildDatCache(elements, dat_cache_file, md5);
-        return InitPinYinAttachDat(dat_cache_file, md5);
-    }
-
-    bool InitAttachDat(const string & dat_cache_file, const string & md5) {
-        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
-
-        if (mmap_fd_ < 0) {
-            return false;
-        }
-
-        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
-        assert(seek_off >= 0);
-        mmap_length_ = seek_off;
-
-        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
-        assert(MAP_FAILED != mmap_addr_);
-
-        assert(mmap_length_ >= sizeof(CacheFileHeader));
-        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
-        elements_num_ = header.elements_num;
-        min_weight_ = header.min_weight;
-        assert(sizeof(header.md5_hex) == md5.size());
-
-        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
-            return false;
-        }
-
-        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(DatMemElem)  + header.dat_size * dat_.unit_size());
-        elements_ptr_ = (const DatMemElem *)(mmap_addr_ + sizeof(header));
-        const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(DatMemElem) * elements_num_;
-        dat_.set_array(dat_ptr, header.dat_size);
-        return true;
-    }
-
-    bool InitIdfAttachDat(const string & dat_cache_file, const string & md5) {
-        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
-
-        if (mmap_fd_ < 0) {
-            return false;
-        }
-
-        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
-        assert(seek_off >= 0);
-        mmap_length_ = seek_off;
-
-        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
-        assert(MAP_FAILED != mmap_addr_);
-
-        assert(mmap_length_ >= sizeof(CacheFileHeader));
-        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
-        elements_num_ = header.elements_num;
-        min_weight_ = header.min_weight;
-        assert(sizeof(header.md5_hex) == md5.size());
-
-        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
-            return false;
-        }
-
-        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double)  + header.dat_size * dat_.unit_size());
-        idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header));
-        const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_;
-        dat_.set_array(dat_ptr, header.dat_size);
-        return true;
-    }
-
-    bool InitPinYinAttachDat(const string & dat_cache_file, const string & md5) {
-        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
-
-        if (mmap_fd_ < 0) {
-            return false;
-        }
-
-        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
-        assert(seek_off >= 0);
-        mmap_length_ = seek_off;
-
-        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
-        assert(MAP_FAILED != mmap_addr_);
-
-        assert(mmap_length_ >= sizeof(CacheFileHeader));
-        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
-        elements_num_ = header.elements_num;
-        min_weight_ = header.min_weight;
-        assert(sizeof(header.md5_hex) == md5.size());
-
-        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
-            return false;
-        }
-
-        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(PinYinMemElem)  + header.dat_size * dat_.unit_size());
-        pinyin_elements_ptr_ = (const PinYinMemElem *)(mmap_addr_ + sizeof(header));
-        const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(PinYinMemElem) * elements_num_;
-        dat_.set_array(dat_ptr, header.dat_size);
-        return true;
-    }
-
-private:
-    void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
-        std::sort(elements.begin(), elements.end());
-
-        vector<const char*> keys_ptr_vec;
-        vector<int> values_vec;
-        vector<DatMemElem> mem_elem_vec;
-
-        keys_ptr_vec.reserve(elements.size());
-        values_vec.reserve(elements.size());
-        mem_elem_vec.reserve(elements.size());
-
-        CacheFileHeader header;
-        header.min_weight = min_weight_;
-        assert(sizeof(header.md5_hex) == md5.size());
-        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-        for (size_t i = 0; i < elements.size(); ++i) {
-            keys_ptr_vec.push_back(elements[i].word.data());
-            values_vec.push_back(i);
-            mem_elem_vec.push_back(DatMemElem());
-            auto & mem_elem = mem_elem_vec.back();
-            mem_elem.weight = elements[i].weight;
-            mem_elem.SetTag(elements[i].tag);
-        }
-
-        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
-        assert(0 == ret);
-        header.elements_num = mem_elem_vec.size();
-        header.dat_size = dat_.size();
-
-        {
-            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-            ::umask(S_IWGRP | S_IWOTH);
-            //const int fd =::mkstemp(&tmp_filepath[0]);
-            //原mkstemp用法有误，已修复--jxx20210519
-            const int fd =::mkstemp((char *)tmp_filepath.data());
-            qDebug() << "mkstemp :" << errno << tmp_filepath.data();
-            assert(fd >= 0);
-            ::fchmod(fd, 0644);
-
-            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
-            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
-            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
-
-            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
-            ::close(fd);
-
-            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
-            assert(0 == rename_ret);
-        }
-    }
-
-    void BuildDatCache(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
-        std::sort(elements.begin(), elements.end());
-
-        vector<const char*> keys_ptr_vec;
-        vector<int> values_vec;
-        vector<double> mem_elem_vec;
-
-        keys_ptr_vec.reserve(elements.size());
-        values_vec.reserve(elements.size());
-        mem_elem_vec.reserve(elements.size());
-
-        CacheFileHeader header;
-        header.min_weight = min_weight_;
-        assert(sizeof(header.md5_hex) == md5.size());
-        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-        for (size_t i = 0; i < elements.size(); ++i) {
-            keys_ptr_vec.push_back(elements[i].word.data());
-            values_vec.push_back(i);
-            mem_elem_vec.push_back(elements[i].idf);
-        }
-
-        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
-        assert(0 == ret);
-        header.elements_num = mem_elem_vec.size();
-        header.dat_size = dat_.size();
-
-        {
-            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-            ::umask(S_IWGRP | S_IWOTH);
-            //const int fd =::mkstemp(&tmp_filepath[0]);
-            //原mkstemp用法有误，已修复--jxx20210519
-            const int fd =::mkstemp((char *)tmp_filepath.data());
-            qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
-            assert(fd >= 0);
-            ::fchmod(fd, 0644);
-
-            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
-            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(double) * mem_elem_vec.size());
-            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
-
-            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(double) + dat_.total_size());
-            ::close(fd);
-
-            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
-            assert(0 == rename_ret);
-        }
-    }
-
-    void BuildDatCache(vector<PinYinElement>& elements, const string & dat_cache_file, const string & md5) {
-        //std::sort(elements.begin(), elements.end());
-
-        vector<const char*> keys_ptr_vec;
-        vector<int> values_vec;
-        vector<PinYinMemElem> mem_elem_vec;
-
-        keys_ptr_vec.reserve(elements.size());
-        values_vec.reserve(elements.size());
-        mem_elem_vec.reserve(elements.size());
-
-        CacheFileHeader header;
-        header.min_weight = min_weight_;
-        assert(sizeof(header.md5_hex) == md5.size());
-        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
-
-        for (size_t i = 0; i < elements.size(); ++i) {
-            keys_ptr_vec.push_back(elements[i].word.data());
-            values_vec.push_back(i);
-            mem_elem_vec.push_back(PinYinMemElem());
-            auto & mem_elem = mem_elem_vec.back();
-            mem_elem.SetTag(elements[i].tag);
-        }
-
-        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
-        assert(0 == ret);
-        header.elements_num = mem_elem_vec.size();
-        header.dat_size = dat_.size();
-
-        {
-            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
-            ::umask(S_IWGRP | S_IWOTH);
-            //const int fd =::mkstemp(&tmp_filepath[0]);
-            const int fd =::mkstemp((char *)tmp_filepath.data());
-            qDebug() << "mkstemp :" << errno << tmp_filepath.data();
-            assert(fd >= 0);
-            ::fchmod(fd, 0644);
-
-            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
-            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
-            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
-
-            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
-            ::close(fd);
-
-            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
-            assert(0 == rename_ret);
-        }
-    }
-
-    DatTrie(const DatTrie &);
-    DatTrie &operator=(const DatTrie &);
-
-private:
-    JiebaDAT dat_;
-    const DatMemElem * elements_ptr_ = nullptr;
-    const double * idf_elements_ptr_ = nullptr;
-    const PinYinMemElem * pinyin_elements_ptr_ = nullptr;
-    size_t elements_num_ = 0;
-    double min_weight_ = 0;
-
-    int mmap_fd_ = -1;
-    size_t mmap_length_ = 0;
-    char * mmap_addr_ = nullptr;
-};
-
-
-inline string CalcFileListMD5(const string & files_list, size_t & file_size_sum) {
-    limonp::MD5 md5;
-
-    const auto files = limonp::Split(files_list, "|;");
-    file_size_sum = 0;
-
-    for (auto const & local_path : files) {
-        const int fd = ::open(local_path.c_str(), O_RDONLY);
-        if( fd < 0){
-            continue;
-        }
-        auto const len = ::lseek(fd, 0, SEEK_END);
-        if (len > 0) {
-            void * addr = ::mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
-            assert(MAP_FAILED != addr);
-
-            md5.Update((unsigned char *) addr, len);
-            file_size_sum += len;
-
-            ::munmap(addr, len);
-        }
-        ::close(fd);
-    }
-
-    md5.Final();
-    return string(md5.digestChars);
-}
-
-}
--- a/libchinese-segmentation/cppjieba/DictTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DictTrie.hpp
@ -1,235 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <string>
-#include <cstring>
-#include <cstdlib>
-#include <stdint.h>
-#include <cmath>
-#include <limits>
-#include "limonp/StringUtil.hpp"
-#include "limonp/Logging.hpp"
-#include "Unicode.hpp"
-#include "DatTrie.hpp"
-#include <QDebug>
-namespace cppjieba {
-
-using namespace limonp;
-
-const double MIN_DOUBLE = -3.14e+100;
-const double MAX_DOUBLE = 3.14e+100;
-const size_t DICT_COLUMN_NUM = 3;
-const char* const UNKNOWN_TAG = "";
-
-class DictTrie {
-public:
-    enum UserWordWeightOption {
-        WordWeightMin,
-        WordWeightMedian,
-        WordWeightMax,
-    }; // enum UserWordWeightOption
-
-    DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "",
-             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
-        Init(dict_path, user_dict_paths, dat_cache_path, user_word_weight_opt);
-    }
-
-    ~DictTrie() {}
-
-    const DatMemElem* Find(const string & word) const {
-        return dat_.Find(word);
-    }
-
-    void Find(RuneStrArray::const_iterator begin,
-              RuneStrArray::const_iterator end,
-              vector<struct DatDag>&res,
-              size_t max_word_len = MAX_WORD_LENGTH) const {
-        dat_.Find(begin, end, res, max_word_len);
-    }
-
-    void Find(RuneStrArray::const_iterator begin,
-              RuneStrArray::const_iterator end,
-              vector<WordRange>& words,
-              size_t max_word_len = MAX_WORD_LENGTH) const {
-        dat_.Find(begin, end, words, max_word_len);
-    }
-
-    bool IsUserDictSingleChineseWord(const Rune& word) const {
-        return IsIn(user_dict_single_chinese_word_, word);
-    }
-
-    double GetMinWeight() const {
-        return dat_.GetMinWeight();
-    }
-
-    size_t GetTotalDictSize() const {
-        return total_dict_size_;
-    }
-
-    void InserUserDictNode(const string& line, bool saveNodeInfo = true) {
-        vector<string> buf;
-        DatElement node_info;
-        Split(line, buf, " ");
-
-        if (buf.size() == 0) {
-            return;
-        }
-
-        node_info.word = buf[0];
-        node_info.weight = user_word_default_weight_;
-        node_info.tag = UNKNOWN_TAG;
-
-        if (buf.size() == 2) {
-            node_info.tag = buf[1];
-        } else if (buf.size() == 3) {
-            if (freq_sum_ > 0.0) {
-                const int freq = atoi(buf[1].c_str());
-                node_info.weight = log(1.0 * freq / freq_sum_);
-                node_info.tag = buf[2];
-            }
-        }
-
-        if (saveNodeInfo) {
-            static_node_infos_.push_back(node_info);
-        }
-
-        if (Utf8CharNum(node_info.word) == 1) {
-            RuneArray word;
-
-            if (DecodeRunesInString(node_info.word, word)) {
-                user_dict_single_chinese_word_.insert(word[0]);
-            } else {
-                XLOG(ERROR) << "Decode " << node_info.word << " failed.";
-            }
-        }
-    }
-
-    void LoadUserDict(const string& filePaths, bool saveNodeInfo = true) {
-        vector<string> files = limonp::Split(filePaths, "|;");
-
-        for (size_t i = 0; i < files.size(); i++) {
-            ifstream ifs(files[i].c_str());
-            XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
-            string line;
-
-            for (; getline(ifs, line);) {
-                if (line.size() == 0) {
-                    continue;
-                }
-
-                InserUserDictNode(line, saveNodeInfo);
-            }
-        }
-    }
-
-
-private:
-    void Init(const string& dict_path, const string& user_dict_paths, string dat_cache_path,
-              UserWordWeightOption user_word_weight_opt) {
-        const auto dict_list = dict_path + "|" + user_dict_paths;
-        size_t file_size_sum = 0;
-        const string md5 = CalcFileListMD5(dict_list, file_size_sum);
-        total_dict_size_ = file_size_sum;
-
-        if (dat_cache_path.empty()) {
-            //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
-            dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) +  ".dat_cache";
-        }
-        QString path = QString::fromStdString(dat_cache_path);
-        qDebug() << "#########Dict path:" << path;
-        if (dat_.InitAttachDat(dat_cache_path, md5)) {
-            LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_;
-            return;
-        }
-
-        LoadDefaultDict(dict_path);
-        freq_sum_ = CalcFreqSum(static_node_infos_);
-        CalculateWeight(static_node_infos_, freq_sum_);
-        double min_weight = 0;
-        SetStaticWordWeights(user_word_weight_opt, min_weight);
-        dat_.SetMinWeight(min_weight);
-
-        LoadUserDict(user_dict_paths);
-        const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
-        assert(build_ret);
-        vector<DatElement>().swap(static_node_infos_);
-    }
-
-    void LoadDefaultDict(const string& filePath) {
-        ifstream ifs(filePath.c_str());
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
-        string line;
-        vector<string> buf;
-
-        for (; getline(ifs, line);) {
-            Split(line, buf, " ");
-            XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
-            DatElement node_info;
-            node_info.word = buf[0];
-            node_info.weight = atof(buf[1].c_str());
-            node_info.tag = buf[2];
-            static_node_infos_.push_back(node_info);
-        }
-    }
-
-    static bool WeightCompare(const DatElement& lhs, const DatElement& rhs) {
-        return lhs.weight < rhs.weight;
-    }
-
-    void SetStaticWordWeights(UserWordWeightOption option, double & min_weight) {
-        XCHECK(!static_node_infos_.empty());
-        vector<DatElement> x = static_node_infos_;
-        sort(x.begin(), x.end(), WeightCompare);
-        if(x.empty()){
-            return;
-        }
-        min_weight = x[0].weight;
-        const double max_weight_ = x[x.size() - 1].weight;
-        const double median_weight_ = x[x.size() / 2].weight;
-
-        switch (option) {
-            case WordWeightMin:
-                user_word_default_weight_ = min_weight;
-                break;
-
-            case WordWeightMedian:
-                user_word_default_weight_ = median_weight_;
-                break;
-
-            default:
-                user_word_default_weight_ = max_weight_;
-                break;
-        }
-    }
-
-    double CalcFreqSum(const vector<DatElement>& node_infos) const {
-        double sum = 0.0;
-
-        for (size_t i = 0; i < node_infos.size(); i++) {
-            sum += node_infos[i].weight;
-        }
-
-        return sum;
-    }
-
-    void CalculateWeight(vector<DatElement>& node_infos, double sum) const {
-        for (size_t i = 0; i < node_infos.size(); i++) {
-            DatElement& node_info = node_infos[i];
-            assert(node_info.weight > 0.0);
-            node_info.weight = log(double(node_info.weight) / sum);
-        }
-    }
-
-private:
-    vector<DatElement> static_node_infos_;
-    size_t total_dict_size_ = 0;
-    DatTrie dat_;
-
-    double freq_sum_;
-    double user_word_default_weight_;
-    unordered_set<Rune> user_dict_single_chinese_word_;
-};
-}
-
--- a/libchinese-segmentation/cppjieba/FullSegment.hpp
+++ b/libchinese-segmentation/cppjieba/FullSegment.hpp
@ -1,58 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <set>
-#include <cassert>
-#include "limonp/Logging.hpp"
-#include "DictTrie.hpp"
-#include "SegmentBase.hpp"
-#include "Unicode.hpp"
-
-namespace cppjieba {
-class FullSegment: public SegmentBase {
-public:
-    FullSegment(const DictTrie* dictTrie)
-        : dictTrie_(dictTrie) {
-        assert(dictTrie_);
-    }
-    ~FullSegment() { }
-
-    virtual void Cut(RuneStrArray::const_iterator begin,
-                     RuneStrArray::const_iterator end,
-                     vector<WordRange>& res, bool, size_t) const override {
-        assert(dictTrie_);
-        vector<struct DatDag> dags;
-        dictTrie_->Find(begin, end, dags);
-        size_t max_word_end_pos = 0;
-
-        for (size_t i = 0; i < dags.size(); i++) {
-            for (const auto & kv : dags[i].nexts) {
-                const size_t nextoffset = kv.first - 1;
-                assert(nextoffset < dags.size());
-                const auto wordLen = nextoffset - i + 1;
-                const bool is_not_covered_single_word = ((dags[i].nexts.size() == 1) && (max_word_end_pos <= i));
-                const bool is_oov = (nullptr == kv.second); //Out-of-Vocabulary
-
-                if ((is_not_covered_single_word) || ((not is_oov) && (wordLen >= 2))) {
-                    WordRange wr(begin + i, begin + nextoffset);
-                    res.push_back(wr);
-                }
-
-                max_word_end_pos = max(max_word_end_pos, nextoffset + 1);
-            }
-        }
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t) const override {
-
-    }
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t) const override {
-
-    }
-private:
-    const DictTrie* dictTrie_;
-};
-}
-
--- a/libchinese-segmentation/cppjieba/HMMModel.hpp
+++ b/libchinese-segmentation/cppjieba/HMMModel.hpp
@ -1,140 +0,0 @@
-#pragma once
-
-#include "limonp/StringUtil.hpp"
-
-namespace cppjieba {
-
-using namespace limonp;
-typedef unordered_map<Rune, double> EmitProbMap;
-
-struct HMMModel {
-    /*
-     * STATUS:
-     * 0: HMMModel::B, 1: HMMModel::E, 2: HMMModel::M, 3:HMMModel::S
-     * */
-    enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
-
-    HMMModel(const string& modelPath) {
-        memset(startProb, 0, sizeof(startProb));
-        memset(transProb, 0, sizeof(transProb));
-        statMap[0] = 'B';
-        statMap[1] = 'E';
-        statMap[2] = 'M';
-        statMap[3] = 'S';
-        emitProbVec.push_back(&emitProbB);
-        emitProbVec.push_back(&emitProbE);
-        emitProbVec.push_back(&emitProbM);
-        emitProbVec.push_back(&emitProbS);
-        LoadModel(modelPath);
-    }
-    ~HMMModel() {
-    }
-    void LoadModel(const string& filePath) {
-        ifstream ifile(filePath.c_str());
-        XCHECK(ifile.is_open()) << "open " << filePath << " failed";
-        string line;
-        vector<string> tmp;
-        vector<string> tmp2;
-        //Load startProb
-        XCHECK(GetLine(ifile, line));
-        Split(line, tmp, " ");
-        XCHECK(tmp.size() == STATUS_SUM);
-
-        for (size_t j = 0; j < tmp.size(); j++) {
-            startProb[j] = atof(tmp[j].c_str());
-        }
-
-        //Load transProb
-        for (size_t i = 0; i < STATUS_SUM; i++) {
-            XCHECK(GetLine(ifile, line));
-            Split(line, tmp, " ");
-            XCHECK(tmp.size() == STATUS_SUM);
-
-            for (size_t j = 0; j < tmp.size(); j++) {
-                transProb[i][j] = atof(tmp[j].c_str());
-            }
-        }
-
-        //Load emitProbB
-        XCHECK(GetLine(ifile, line));
-        XCHECK(LoadEmitProb(line, emitProbB));
-
-        //Load emitProbE
-        XCHECK(GetLine(ifile, line));
-        XCHECK(LoadEmitProb(line, emitProbE));
-
-        //Load emitProbM
-        XCHECK(GetLine(ifile, line));
-        XCHECK(LoadEmitProb(line, emitProbM));
-
-        //Load emitProbS
-        XCHECK(GetLine(ifile, line));
-        XCHECK(LoadEmitProb(line, emitProbS));
-    }
-    double GetEmitProb(const EmitProbMap* ptMp, Rune key,
-                       double defVal)const {
-        EmitProbMap::const_iterator cit = ptMp->find(key);
-
-        if (cit == ptMp->end()) {
-            return defVal;
-        }
-
-        return cit->second;
-    }
-    bool GetLine(ifstream& ifile, string& line) {
-        while (getline(ifile, line)) {
-            Trim(line);
-
-            if (line.empty()) {
-                continue;
-            }
-
-            if (StartsWith(line, "#")) {
-                continue;
-            }
-
-            return true;
-        }
-
-        return false;
-    }
-    bool LoadEmitProb(const string& line, EmitProbMap& mp) {
-        if (line.empty()) {
-            return false;
-        }
-
-        vector<string> tmp, tmp2;
-        RuneArray unicode;
-        Split(line, tmp, ",");
-
-        for (size_t i = 0; i < tmp.size(); i++) {
-            Split(tmp[i], tmp2, ":");
-
-            if (2 != tmp2.size()) {
-                XLOG(ERROR) << "emitProb illegal.";
-                return false;
-            }
-
-            if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
-                XLOG(ERROR) << "TransCode failed.";
-                return false;
-            }
-
-            mp[unicode[0]] = atof(tmp2[1].c_str());
-        }
-
-        return true;
-    }
-
-    char statMap[STATUS_SUM];
-    double startProb[STATUS_SUM];
-    double transProb[STATUS_SUM][STATUS_SUM];
-    EmitProbMap emitProbB;
-    EmitProbMap emitProbE;
-    EmitProbMap emitProbM;
-    EmitProbMap emitProbS;
-    vector<EmitProbMap* > emitProbVec;
-}; // struct HMMModel
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/HMMSegment.hpp
+++ b/libchinese-segmentation/cppjieba/HMMSegment.hpp
@ -1,195 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <memory.h>
-#include <cassert>
-#include "HMMModel.hpp"
-#include "SegmentBase.hpp"
-
-namespace cppjieba {
-class HMMSegment: public SegmentBase {
-public:
-    HMMSegment(const HMMModel* model)
-        : model_(model) {
-    }
-    ~HMMSegment() { }
-
-    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool,
-                     size_t) const override {
-        RuneStrArray::const_iterator left = begin;
-        RuneStrArray::const_iterator right = begin;
-
-        while (right != end) {
-            if (right->rune < 0x80) { //asc码
-                if (left != right) {
-                    InternalCut(left, right, res);
-                }
-
-                left = right;
-
-                do {
-                    right = SequentialLetterRule(left, end);//非英文字符则返回left，否则返回left后非英文字母的位置
-
-                    if (right != left) {
-                        break;
-                    }
-
-                    right = NumbersRule(left, end);//非数字则返回left，否则返回left后非数字的位置
-
-                    if (right != left) {
-                        break;
-                    }
-
-                    right ++;
-                } while (false);
-
-                WordRange wr(left, right - 1);
-                res.push_back(wr);
-                left = right;
-            } else {
-                right++;
-            }
-        }
-
-        if (left != right) {
-            InternalCut(left, right, res);
-        }
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t) const override {
-
-    }
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t) const override {
-
-    }
-private:
-    // sequential letters rule
-    RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin,
-                                                      RuneStrArray::const_iterator end) const {
-        Rune x = begin->rune;
-
-        if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
-            begin ++;
-        } else {
-            return begin;
-        }
-
-        while (begin != end) {
-            x = begin->rune;
-
-            if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
-                begin ++;
-            } else {
-                break;
-            }
-        }
-
-        return begin;
-    }
-    //
-    RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
-        Rune x = begin->rune;
-
-        if ('0' <= x && x <= '9') {
-            begin ++;
-        } else {
-            return begin;
-        }
-
-        while (begin != end) {
-            x = begin->rune;
-
-            if (('0' <= x && x <= '9') || x == '.') {
-                begin++;
-            } else {
-                break;
-            }
-        }
-
-        return begin;
-    }
-    void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
-        vector<size_t> status;
-        Viterbi(begin, end, status);
-
-        RuneStrArray::const_iterator left = begin;
-        RuneStrArray::const_iterator right;
-
-        for (size_t i = 0; i < status.size(); i++) {
-            if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
-                right = begin + i + 1;
-                WordRange wr(left, right - 1);
-                res.push_back(wr);
-                left = right;
-            }
-        }
-    }
-
-    void Viterbi(RuneStrArray::const_iterator begin,
-                 RuneStrArray::const_iterator end,
-                 vector<size_t>& status) const {
-        size_t Y = HMMModel::STATUS_SUM;
-        size_t X = end - begin;
-
-        size_t XYSize = X * Y;
-        size_t now, old, stat;
-        double tmp, endE, endS;
-
-        //vector<int> path(XYSize);
-        //vector<double> weight(XYSize);
-        int path[XYSize];
-        double weight[XYSize];
-
-        //start
-        for (size_t y = 0; y < Y; y++) {
-            weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
-            path[0 + y * X] = -1;
-        }
-
-        double emitProb;
-
-        for (size_t x = 1; x < X; x++) {
-            for (size_t y = 0; y < Y; y++) {
-                now = x + y * X;
-                weight[now] = MIN_DOUBLE;
-                path[now] = HMMModel::E; // warning
-                emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE);
-
-                for (size_t preY = 0; preY < Y; preY++) {
-                    old = x - 1 + preY * X;
-                    tmp = weight[old] + model_->transProb[preY][y] + emitProb;
-
-                    if (tmp > weight[now]) {
-                        weight[now] = tmp;
-                        path[now] = preY;
-                    }
-                }
-            }
-        }
-
-        endE = weight[X - 1 + HMMModel::E * X];
-        endS = weight[X - 1 + HMMModel::S * X];
-        stat = 0;
-
-        if (endE >= endS) {
-            stat = HMMModel::E;
-        } else {
-            stat = HMMModel::S;
-        }
-
-        status.resize(X);
-
-        for (int x = X - 1 ; x >= 0; x--) {
-            status[x] = stat;
-            stat = path[x + stat * X];
-        }
-    }
-
-    const HMMModel* model_;
-}; // class HMMSegment
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/IdfTrie.hpp
+++ b/libchinese-segmentation/cppjieba/IdfTrie.hpp
@ -1,117 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <string>
-#include <cstring>
-#include <cstdlib>
-#include <stdint.h>
-#include <cmath>
-#include <limits>
-#include "limonp/StringUtil.hpp"
-#include "limonp/Logging.hpp"
-#include "Unicode.hpp"
-#include "DatTrie.hpp"
-#include <QDebug>
-namespace cppjieba {
-
-using namespace limonp;
-
-const size_t IDF_COLUMN_NUM = 2;
-
-class IdfTrie {
-public:
-    enum UserWordWeightOption {
-        WordWeightMin,
-        WordWeightMedian,
-        WordWeightMax,
-    }; // enum UserWordWeightOption
-
-    IdfTrie(const string& dict_path, const string & dat_cache_path = "",
-             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
-        Init(dict_path, dat_cache_path, user_word_weight_opt);
-    }
-
-    ~IdfTrie() {}
-
-    double Find(const string & word, std::size_t length = 0, std::size_t node_pos = 0) const {
-        return dat_.Find(word, length, node_pos);
-    }
-
-    size_t GetTotalDictSize() const {
-        return total_dict_size_;
-    }
-
-private:
-    void Init(const string& dict_path, string dat_cache_path,
-              UserWordWeightOption user_word_weight_opt) {
-        size_t file_size_sum = 0;
-        const string md5 = CalcFileListMD5(dict_path, file_size_sum);
-        total_dict_size_ = file_size_sum;
-
-        if (dat_cache_path.empty()) {
-            //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
-            dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) +  ".dat_cache";
-        }
-        QString path = QString::fromStdString(dat_cache_path);
-        qDebug() << "#########Idf path:" << path;
-        if (dat_.InitIdfAttachDat(dat_cache_path, md5)) {
-            return;
-        }
-
-        LoadDefaultIdf(dict_path);
-        double idf_sum_ = CalcIdfSum(static_node_infos_);
-        assert(static_node_infos_.size());
-        idfAverage_ = idf_sum_ / static_node_infos_.size();
-        assert(idfAverage_ > 0.0);
-        double min_weight = 0;
-        dat_.SetMinWeight(min_weight);
-
-        const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
-        assert(build_ret);
-        vector<IdfElement>().swap(static_node_infos_);
-    }
-
-    void LoadDefaultIdf(const string& filePath) {
-        ifstream ifs(filePath.c_str());
-        if(not ifs.is_open()){
-            return ;
-        }
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
-        string line;
-        vector<string> buf;
-        size_t lineno = 0;
-
-        for (; getline(ifs, line); lineno++) {
-            if (line.empty()) {
-                XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
-                continue;
-            }
-            Split(line, buf, " ");
-            XCHECK(buf.size() == IDF_COLUMN_NUM) << "split result illegal, line:" << line;
-            IdfElement node_info;
-            node_info.word = buf[0];
-            node_info.idf = atof(buf[1].c_str());
-            static_node_infos_.push_back(node_info);
-        }
-    }
-
-    double CalcIdfSum(const vector<IdfElement>& node_infos) const {
-        double sum = 0.0;
-
-        for (size_t i = 0; i < node_infos.size(); i++) {
-            sum += node_infos[i].idf;
-        }
-
-        return sum;
-    }
-public:
-    double idfAverage_;
-private:
-    vector<IdfElement> static_node_infos_;
-    size_t total_dict_size_ = 0;
-    DatTrie dat_;
-};
-}
-
--- a/libchinese-segmentation/cppjieba/Jieba.hpp
+++ b/libchinese-segmentation/cppjieba/Jieba.hpp
@ -1,101 +0,0 @@
-#pragma once
-
-#include <memory>
-#include "QuerySegment.hpp"
-#include "KeywordExtractor.hpp"
-
-namespace cppjieba {
-
-class Jieba {
-public:
-    Jieba(const string& dict_path,
-          const string& model_path,
-          const string& user_dict_path,
-          const string& idfPath = "",
-          const string& stopWordPath = "",
-          const string& dat_cache_path = "")
-        : dict_trie_(dict_path, user_dict_path, dat_cache_path),
-          model_(model_path),
-          mp_seg_(&dict_trie_),
-          hmm_seg_(&model_),
-          mix_seg_(&dict_trie_, &model_, stopWordPath),
-          full_seg_(&dict_trie_),
-          query_seg_(&dict_trie_, &model_, stopWordPath),
-          extractor(&dict_trie_, &model_, idfPath, dat_cache_path,stopWordPath){ }
-    ~Jieba() { }
-
-    void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
-        mix_seg_.CutToStr(sentence, words, hmm);
-    }
-    void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
-        mix_seg_.CutToWord(sentence, words, hmm);
-    }
-    void CutAll(const string& sentence, vector<string>& words) const {
-        full_seg_.CutToStr(sentence, words);
-    }
-    void CutAll(const string& sentence, vector<Word>& words) const {
-        full_seg_.CutToWord(sentence, words);
-    }
-    void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
-        query_seg_.CutToStr(sentence, words, hmm);
-    }
-    void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
-        query_seg_.CutToWord(sentence, words, hmm);
-    }
-    void CutHMM(const string& sentence, vector<string>& words) const {
-        hmm_seg_.CutToStr(sentence, words);
-    }
-    void CutHMM(const string& sentence, vector<Word>& words) const {
-        hmm_seg_.CutToWord(sentence, words);
-    }
-    void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
-        mp_seg_.CutToStr(sentence, words, false, max_word_len);
-    }
-    void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
-        mp_seg_.CutToWord(sentence, words, false, max_word_len);
-    }
-
-    void Tag(const string& sentence, vector<pair<string, string> >& words) const {
-        mix_seg_.Tag(sentence, words);
-    }
-    string LookupTag(const string &str) const {
-        return mix_seg_.LookupTag(str);
-    }
-    bool Find(const string& word) {
-        return nullptr != dict_trie_.Find(word);
-    }
-
-    void ResetSeparators(const string& s) {
-        //TODO
-        mp_seg_.ResetSeparators(s);
-        hmm_seg_.ResetSeparators(s);
-        mix_seg_.ResetSeparators(s);
-        full_seg_.ResetSeparators(s);
-        query_seg_.ResetSeparators(s);
-    }
-
-    const DictTrie* GetDictTrie() const {
-        return &dict_trie_;
-    }
-
-    const HMMModel* GetHMMModel() const {
-        return &model_;
-    }
-
-private:
-    DictTrie dict_trie_;
-    HMMModel model_;
-
-    // They share the same dict trie and model
-    MPSegment mp_seg_;
-    HMMSegment hmm_seg_;
-    MixSegment mix_seg_;
-    FullSegment full_seg_;
-    QuerySegment query_seg_;
-
-public:
-    KeywordExtractor extractor;
-}; // class Jieba
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
@ -1,99 +0,0 @@
-#pragma once
-
-#include <cmath>
-#include "MixSegment.hpp"
-#include "IdfTrie.hpp"
-
-namespace cppjieba {
-
-using namespace limonp;
-using namespace std;
-
-/*utf8*/
-class KeywordExtractor {
-public:
-
-    KeywordExtractor(const DictTrie* dictTrie,
-                     const HMMModel* model,
-                     const string& idfPath,
-                     const string& dat_cache_path,
-                     const string& stopWordPath)
-        : segment_(dictTrie, model, stopWordPath),
-        idf_trie_(idfPath,dat_cache_path){
-    }
-    ~KeywordExtractor() {
-    }
-
-    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
-        vector<KeyWord> topWords;
-        Extract(sentence, topWords, topN);
-
-        for (size_t i = 0; i < topWords.size(); i++) {
-            keywords.push_back(topWords[i].word);
-        }
-    }
-
-    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
-        vector<KeyWord> topWords;
-        Extract(sentence, topWords, topN);
-
-        for (size_t i = 0; i < topWords.size(); i++) {
-            keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
-        }
-    }
-
-    void Extract(const string& sentence, vector<KeyWord>& keywords, size_t topN) const {
-
-        unordered_map<string, KeyWord> wordmap;//插入字符串与Word的map，相同string统计词频叠加权重
-        PreFilter pre_filter(symbols_, sentence);
-        RuneStrArray::const_iterator null_p;
-        WordRange range(null_p, null_p);
-        bool isNull(false);
-        while (pre_filter.Next(range, isNull)) {
-            if (isNull) {
-                continue;
-            }
-            segment_.CutToStr(sentence, range,  wordmap);
-        }
-
-        keywords.clear();
-        keywords.reserve(wordmap.size());
-
-        for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
-            double idf = idf_trie_.Find(itr->first);
-            if (-1 != idf) {//IDF词典查找
-                itr->second.weight *= idf;
-            } else {
-                itr->second.weight *= idf_trie_.idfAverage_;
-            }
-
-            itr->second.word = itr->first;
-            keywords.push_back(itr->second);
-        }
-
-        topN = min(topN, keywords.size());
-        partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
-        keywords.resize(topN);
-    }
-private:
-
-    static bool Compare(const KeyWord& lhs, const KeyWord& rhs) {
-        return lhs.weight > rhs.weight;
-    }
-
-    MixSegment segment_;
-    IdfTrie idf_trie_;
-
-
-    unordered_set<Rune> symbols_;
-}; // class KeywordExtractor
-
-inline ostream& operator << (ostream& os, const KeyWord& word) {
-    return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
-           "}";
-}
-
-} // namespace cppjieba
-
-
-
--- a/libchinese-segmentation/cppjieba/MPSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MPSegment.hpp
@ -1,129 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <set>
-#include <cassert>
-#include "limonp/Logging.hpp"
-#include "DictTrie.hpp"
-#include "SegmentTagged.hpp"
-#include "PosTagger.hpp"
-
-namespace cppjieba {
-
-class MPSegment: public SegmentTagged {
-public:
-    MPSegment(const DictTrie* dictTrie)
-        : dictTrie_(dictTrie) {
-        assert(dictTrie_);
-    }
-    ~MPSegment() { }
-
-    virtual void Cut(RuneStrArray::const_iterator begin,
-                     RuneStrArray::const_iterator end,
-                     vector<WordRange>& words,
-                     bool, size_t max_word_len) const override {
-//        vector<DatDag> dags;
-//        dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx
-//        CalcDP(dags);//动态规划（Dynamic Programming，DP），根据DAG计算最优动态规划路径--jxx
-//        CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx
-        dictTrie_->Find(begin, end, words, max_word_len);
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t) const override {
-
-    }
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t) const override {
-
-    }
-    const DictTrie* GetDictTrie() const override {
-        return dictTrie_;
-    }
-
-    bool Tag(const string& src, vector<pair<string, string> >& res) const override {
-        return tagger_.Tag(src, res, *this);
-    }
-
-    bool IsUserDictSingleChineseWord(const Rune& value) const {
-        return dictTrie_->IsUserDictSingleChineseWord(value);
-    }
-private:
-/*
-    void CalcDP(vector<DatDag>& dags) const {
-        double val(0);
-        for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) {
-            rit->max_next = -1;
-            rit->max_weight = MIN_DOUBLE;
-
-            for (const auto & it : rit->nexts) {
-                const auto nextPos = it.first;
-                val = dictTrie_->GetMinWeight();
-
-                if (nullptr != it.second) {
-                    val = it.second->weight;
-                }
-
-                if (nextPos  < dags.size()) {
-                    val += dags[nextPos].max_weight;
-                }
-
-                if ((nextPos <= dags.size()) && (val > rit->max_weight)) {
-                    rit->max_weight = val;
-                    rit->max_next = nextPos;
-                }
-            }
-        }
-    }
-*/
-/*  倒叙方式重写CalcDP函数，初步测试未发现问题*/
-    void CalcDP(vector<DatDag>& dags) const {
-        double val(0);
-        size_t size = dags.size();
-
-        for (size_t i = 0; i < size; i++) {
-            dags[size - 1 - i].max_next = -1;
-            dags[size - 1 - i].max_weight = MIN_DOUBLE;
-
-            for (const auto & it : dags[size - 1 - i].nexts) {
-                const auto nextPos = it.first;
-                val = dictTrie_->GetMinWeight();
-
-                if (nullptr != it.second) {
-                    val = it.second->weight;
-                }
-
-                if (nextPos  < dags.size()) {
-                    val += dags[nextPos].max_weight;
-                }
-
-                if ((nextPos <= dags.size()) && (val > dags[size - 1 - i].max_weight)) {
-                    dags[size - 1 - i].max_weight = val;
-                    dags[size - 1 - i].max_next = nextPos;
-                }
-            }
-        }
-    }
-
-    void CutByDag(RuneStrArray::const_iterator begin,
-                  RuneStrArray::const_iterator,
-                  const vector<DatDag>& dags,
-                  vector<WordRange>& words) const {
-
-        for (size_t i = 0; i < dags.size();) {
-            const auto next = dags[i].max_next;
-            assert(next > i);
-            assert(next <= dags.size());
-            WordRange wr(begin + i, begin + next - 1);
-            words.push_back(wr);
-            i = next;
-        }
-    }
-
-    const DictTrie* dictTrie_;
-    PosTagger tagger_;
-
-}; // class MPSegment
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/MixSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MixSegment.hpp
@ -1,245 +0,0 @@
-#pragma once
-
-#include <cassert>
-#include "MPSegment.hpp"
-#include "HMMSegment.hpp"
-#include "limonp/StringUtil.hpp"
-#include "PosTagger.hpp"
-
-namespace cppjieba {
-class MixSegment: public SegmentTagged {
-public:
-    MixSegment(const DictTrie* dictTrie,
-               const HMMModel* model,
-               const string& stopWordPath)
-        : mpSeg_(dictTrie), hmmSeg_(model) {
-        LoadStopWordDict(stopWordPath);
-    }
-    ~MixSegment() {}
-
-    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
-                     size_t) const override {
-        if (!hmm) {
-            mpSeg_.CutRuneArray(begin, end, res);
-            return;
-        }
-
-        vector<WordRange> words;
-        assert(end >= begin);
-        words.reserve(end - begin);
-        mpSeg_.CutRuneArray(begin, end, words);
-
-        vector<WordRange> hmmRes;
-        hmmRes.reserve(end - begin);
-
-        for (size_t i = 0; i < words.size(); i++) {
-            //if mp Get a word, it's ok, put it into result
-            if (words[i].left != words[i].right || (words[i].left == words[i].right &&
-                                                    mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
-                res.push_back(words[i]);
-                continue;
-            }
-
-            // if mp Get a single one and it is not in userdict, collect it in sequence
-            size_t j = i;
-
-            while (j < words.size() && words[j].left == words[j].right &&
-                   !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
-                j++;
-            }
-
-            // Cut the sequence with hmm
-            assert(j - 1 >= i);
-            // TODO
-            hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
-
-            //put hmm result to result
-            for (size_t k = 0; k < hmmRes.size(); k++) {
-                res.push_back(hmmRes[k]);
-            }
-
-            //clear tmp vars
-            hmmRes.clear();
-
-            //let i jump over this piece
-            i = j - 1;
-        }
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t) const override {
-        //目前hmm默认开启，后期如有需要关闭再修改--jxx20210519
-//        if (!hmm) {
-//            mpSeg_.CutRuneArray(begin, end, res);
-//            return;
-//        }
-
-        vector<WordRange> words;
-        assert(end >= begin);
-        words.reserve(end - begin);
-        mpSeg_.CutRuneArray(begin, end, words);
-
-        vector<WordRange> hmmRes;
-        hmmRes.reserve(end - begin);
-
-        for (size_t i = 0; i < words.size(); i++) {
-            //if mp Get a word, it's ok, put it into result
-            if (words[i].left != words[i].right) {
-                res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
-                continue;
-            }
-            if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
-                    || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
-                res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
-                continue;
-            }
-
-            // if mp Get a single one and it is not in userdict, collect it in sequence
-            size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里（i字符不是最后一个字符），直接判定j字符
-
-            while (j < (words.size() - 1) && words[j].left == words[j].right &&
-                   !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
-                j++;
-            }
-
-            // Cut the sequence with hmm
-            assert(j - 1 >= i);
-            // TODO
-            hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
-
-            //put hmm result to result
-            for (size_t k = 0; k < hmmRes.size(); k++) {
-                res.push_back(GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right));
-            }
-
-            //clear tmp vars
-            hmmRes.clear();
-
-            //let i jump over this piece
-            i = j - 1;
-        }
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t) const override {
-        vector<WordRange> words;
-        vector<WordRange> hmmRes;
-        assert(end >= begin);
-        if (3 == begin->len or 4 == begin->len) {
-            words.reserve(end - begin);
-            mpSeg_.CutRuneArray(begin, end, words);
-            hmmRes.reserve(words.size());
-        } else {
-            hmmRes.reserve(end - begin);
-        }
-
-        if (words.size() != 0) {//存在中文分词结果
-            for (size_t i = 0; i < words.size(); i++) {
-
-                string str = GetStringFromRunes(s, words[i].left, words[i].right);
-
-                if (words[i].left != words[i].right) {
-                    if (stopWords_.find(str) != stopWords_.end()) {
-                        continue;
-                    }
-                    res[str].offsets.push_back(words[i].left->offset);
-                    res[str].weight += 1.0;
-                    continue;
-                }
-
-                if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
-                        || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
-                    if (stopWords_.find(str) != stopWords_.end()) {
-                        continue;
-                    }
-                    res[str].offsets.push_back(words[i].left->offset);
-                    res[str].weight += 1.0;
-                    continue;
-                }
-                // if mp Get a single one and it is not in userdict, collect it in sequence
-                size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里（i字符不是最后一个字符），直接判定j字符
-                bool isLastWordsSingle(false);
-                while (j <= (words.size() - 1)
-                       && words[j].left == words[j].right
-                       && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
-                    if (j == (words.size() - 1)) {//最后一个分词结果是单字
-                        isLastWordsSingle = true;
-                        break;
-                    }
-                    j++;
-                }
-
-                // Cut the sequence with hmm
-                assert(j - 1 >= i);
-                // TODO
-                if (isLastWordsSingle) {
-                    hmmSeg_.CutRuneArray(words[i].left, words[j].left + 1, hmmRes);
-                } else {
-                    hmmSeg_.CutRuneArray(words[i].left, words[j].left, hmmRes);
-                }
-
-                //put hmm result to result
-                for (size_t k = 0; k < hmmRes.size(); k++) {
-                    string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right);
-                    if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) {
-                        continue;
-                    }
-                    res[hmmStr].offsets.push_back(hmmRes[k].left->offset);
-                    res[hmmStr].weight += 1.0;
-                }
-
-                //clear tmp vars
-                hmmRes.clear();
-
-                //let i jump over this piece
-                if (isLastWordsSingle) {
-                    break;
-                }
-                i = j - 1;
-            }
-        } else {//不存在中文分词结果
-            for (size_t i = 0; i < (size_t)(end - begin); i++) {
-                string str = s.substr((begin+i)->offset, (begin+i)->len);
-                res[str].offsets.push_back((begin+i)->offset);
-                res[str].weight += 1.0;
-            }
-        }
-    }
-
-    const DictTrie* GetDictTrie() const override {
-        return mpSeg_.GetDictTrie();
-    }
-
-    bool Tag(const string& src, vector<pair<string, string> >& res) const override {
-        return tagger_.Tag(src, res, *this);
-    }
-
-    string LookupTag(const string &str) const {
-        return tagger_.LookupTag(str, *this);
-    }
-
-    void LoadStopWordDict(const string& filePath) {
-        ifstream ifs(filePath.c_str());
-        if(not ifs.is_open()){
-            return ;
-        }
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed";
-        string line ;
-
-        while (getline(ifs, line)) {
-            stopWords_.insert(line);
-        }
-
-        assert(stopWords_.size());
-    }
-private:
-    unordered_set<string> stopWords_;
-
-    MPSegment mpSeg_;
-    HMMSegment hmmSeg_;
-    PosTagger tagger_;
-
-}; // class MixSegment
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/PinYinTrie.hpp
+++ b/libchinese-segmentation/cppjieba/PinYinTrie.hpp
@ -1,154 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <string>
-#include <cstring>
-#include <cstdlib>
-#include <stdint.h>
-#include <cmath>
-#include <limits>
-#include "limonp/StringUtil.hpp"
-#include "limonp/Logging.hpp"
-#include "Unicode.hpp"
-#include "DatTrie.hpp"
-#include <QDebug>
-namespace cppjieba {
-
-using namespace limonp;
-
-const size_t PINYIN_COLUMN_NUM = 2;
-
-class PinYinTrie {
-public:
-    enum UserWordWeightOption {
-        WordWeightMin,
-        WordWeightMedian,
-        WordWeightMax,
-    }; // enum UserWordWeightOption
-
-    PinYinTrie(const string& dict_path, const string & dat_cache_path = "",
-             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
-        Init(dict_path, dat_cache_path, user_word_weight_opt);
-    }
-
-    ~PinYinTrie() {}
-
-    int getMultiTonResults(string word, QStringList &results) {
-        if (qmap_chinese2pinyin.contains(QString::fromStdString(word))) {
-            for (auto i:qmap_chinese2pinyin[QString::fromStdString(word)])
-                results.push_back(i);
-            return 0;
-        }
-        return -1;
-    }
-
-    int getSingleTonResult(string word, QString &result) {
-        const PinYinMemElem * tmp = dat_.PinYinFind(word);
-        if (tmp) {
-            result = QString::fromStdString(tmp->GetTag());
-            return 0;
-        }
-        return -1;
-    }
-
-    bool contains(string &word) {
-        if (qmap_chinese2pinyin.contains(QString::fromStdString(word))
-                or !dat_.PinYinFind(word))
-            return true;
-//        if (map_chinese2pinyin.contains(word)
-//                or !dat_.PinYinFind(word))
-//            return true;
-        return false;
-    }
-
-    bool isMultiTone(const string &word) {
-        if (qmap_chinese2pinyin.contains(QString::fromStdString(word)))
-            return true;
-//        if (map_chinese2pinyin.contains(word))
-//            return true;
-        return false;
-    }
-
-    size_t GetTotalDictSize() const {
-        return total_dict_size_;
-    }
-
-private:
-    void Init(const string& dict_path, string dat_cache_path,
-              UserWordWeightOption user_word_weight_opt) {
-        size_t file_size_sum = 0;
-        vector<PinYinElement> node_infos;
-        const string md5 = CalcFileListMD5(dict_path, file_size_sum);
-        total_dict_size_ = file_size_sum;
-
-        if (dat_cache_path.empty()) {
-            //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
-            dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) +  ".dat_cache";
-        }
-        QString path = QString::fromStdString(dat_cache_path);
-        qDebug() << "#########PinYin path:" << path << file_size_sum;
-        if (dat_.InitPinYinAttachDat(dat_cache_path, md5)) {
-            //多音字仍需遍历文件信息
-            LoadDefaultPinYin(node_infos, dict_path, true);
-            return;
-        }
-
-        LoadDefaultPinYin(node_infos, dict_path, false);
-        double min_weight = 0;
-        dat_.SetMinWeight(min_weight);
-
-        const auto build_ret = dat_.InitBuildDat(node_infos, dat_cache_path, md5);
-        assert(build_ret);
-        vector<PinYinElement>().swap(node_infos);
-    }
-
-    void LoadDefaultPinYin(vector<PinYinElement> &node_infos, const string& filePath, bool multiFlag) {
-        ifstream ifs(filePath.c_str());
-        if(not ifs.is_open()){
-            return ;
-        }
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
-        string line;
-        vector<string> buf;
-        size_t lineno = 0;
-
-        for (; getline(ifs, line); lineno++) {
-            if (line.empty()) {
-                XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
-                continue;
-            }
-            Split(line, buf, " ");
-            if (buf.size() == PINYIN_COLUMN_NUM) {
-                if (multiFlag) {//非多音字
-                    continue;
-                }
-                PinYinElement node_info;
-                node_info.word = buf[1];
-                node_info.tag = buf[0];
-                node_infos.push_back(node_info);
-            } else {//多音字
-                QString content = QString::fromUtf8(line.c_str());
-                qmap_chinese2pinyin[content.split(" ").last().trimmed()] = content.split(" ");
-                qmap_chinese2pinyin[content.split(" ").last().trimmed()].pop_back();
-                /*
-                 //std map string list
-                 list<string> tmpList;
-                 for(int i = 0; i < buf.size() - 1; ++i){
-                    tmpList.push_back(buf[i]);
-                 }
-                 map[buf[buf.size() - 1]] = tmpList;
-                */
-            }
-        }
-    }
-
-private:
-    QMap<QString, QStringList> qmap_chinese2pinyin;
-    //map<string, list<string>> map_chinese2pinyin;
-    size_t total_dict_size_ = 0;
-    DatTrie dat_;
-};
-}
-
--- a/libchinese-segmentation/cppjieba/PosTagger.hpp
+++ b/libchinese-segmentation/cppjieba/PosTagger.hpp
@ -1,83 +0,0 @@
-#pragma once
-
-#include "limonp/StringUtil.hpp"
-#include "DictTrie.hpp"
-#include "SegmentTagged.hpp"
-
-namespace cppjieba {
-using namespace limonp;
-
-static const char* const POS_M = "m";
-static const char* const POS_ENG = "eng";
-static const char* const POS_X = "x";
-
-class PosTagger {
-public:
-    PosTagger() {
-    }
-    ~PosTagger() {
-    }
-
-    bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
-        vector<string> CutRes;
-        segment.CutToStr(src, CutRes);
-
-        for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
-            res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
-        }
-
-        return !res.empty();
-    }
-
-    string LookupTag(const string &str, const SegmentTagged& segment) const {
-        const DictTrie * dict = segment.GetDictTrie();
-        assert(dict != NULL);
-        const auto tmp = dict->Find(str);
-
-        if (tmp == NULL || tmp->GetTag().empty()) {
-            RuneStrArray runes;
-
-            if (!DecodeRunesInString(str, runes)) {
-                XLOG(ERROR) << "Decode failed.";
-                return POS_X;
-            }
-
-            return SpecialRule(runes);
-        } else {
-            return tmp->GetTag();
-        }
-    }
-
-private:
-    const char* SpecialRule(const RuneStrArray& unicode) const {
-        size_t m = 0;
-        size_t eng = 0;
-
-        for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
-            if (unicode[i].rune < 0x80) {
-                eng ++;
-
-                if ('0' <= unicode[i].rune && unicode[i].rune <= '9') {
-                    m++;
-                }
-            }
-        }
-
-        // ascii char is not found
-        if (eng == 0) {
-            return POS_X;
-        }
-
-        // all the ascii is number char
-        if (m == eng) {
-            return POS_M;
-        }
-
-        // the ascii chars contain english letter
-        return POS_ENG;
-    }
-
-}; // class PosTagger
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/PreFilter.hpp
+++ b/libchinese-segmentation/cppjieba/PreFilter.hpp
@ -1,126 +0,0 @@
-#pragma once
-
-#include "limonp/Logging.hpp"
-#include <unordered_set>
-#include "Unicode.hpp"
-
-namespace cppjieba {
-
-class PreFilter {
-public:
-    PreFilter(const std::unordered_set<Rune>& symbols,
-              const string& sentence)
-        : symbols_(symbols) {
-        if (!DecodeRunesInString(sentence, sentence_)) {
-            XLOG(ERROR) << "decode failed. "<<sentence;
-        }
-
-        cursor_ = sentence_.begin();
-    }
-    ~PreFilter() {
-    }
-    bool HasNext() const {
-        return cursor_ != sentence_.end();
-    }
-    bool Next(WordRange& wordRange) {
-
-        if (cursor_ == sentence_.end()) {
-            return false;
-        }
-
-        wordRange.left = cursor_;
-
-        while (cursor_->rune == 0x20 && cursor_ != sentence_.end()) {
-            cursor_++;
-        }
-
-        if (cursor_ == sentence_.end()) {
-            wordRange.right = cursor_;
-            return true;
-        }
-
-        while (++cursor_ != sentence_.end()) {
-            if (cursor_->rune == 0x20) {
-                wordRange.right = cursor_;
-                return true;
-            }
-        }
-
-        wordRange.right = sentence_.end();
-        return true;
-    }
-
-    bool Next(WordRange& wordRange, bool& isNull) {
-        isNull = false;
-        if (cursor_ == sentence_.end()) {
-            return false;
-        }
-
-        wordRange.left = cursor_;
-        if (cursor_->rune == 0x20) {
-            while (cursor_ != sentence_.end()) {
-                if (cursor_->rune != 0x20) {
-                    if (wordRange.left == cursor_) {
-                        cursor_ ++;
-                    }
-                    wordRange.right = cursor_;
-                    isNull = true;
-                    return true;
-                }
-                cursor_ ++;
-            }
-        }
-
-        int max_num = 0;
-        uint32_t utf8_num = cursor_->len;
-
-        while (cursor_ != sentence_.end()) {
-            if (cursor_->rune == 0x20) {
-                if (wordRange.left == cursor_) {
-                    cursor_ ++;
-                }
-
-                wordRange.right = cursor_;
-                return true;
-            }
-
-            cursor_ ++;
-            max_num++;
-            if (max_num >= 1024 or cursor_->len != utf8_num) { //todo 防止一次性传入过多字节，暂定限制为1024个字
-                wordRange.right = cursor_;
-                return true;
-            }
-        }
-
-        wordRange.right = sentence_.end();
-        return true;
-    }
-
-    WordRange Next() {
-        WordRange range(cursor_, cursor_);
-
-        while (cursor_ != sentence_.end()) {
-            //if (IsIn(symbols_, cursor_->rune)) {
-            if (cursor_->rune == 0x20) {
-                if (range.left == cursor_) {
-                    cursor_ ++;
-                }
-
-                range.right = cursor_;
-                return range;
-            }
-
-            cursor_ ++;
-        }
-
-        range.right = sentence_.end();
-        return range;
-    }
-private:
-    RuneStrArray::const_iterator cursor_;
-    RuneStrArray sentence_;
-    const std::unordered_set<Rune>& symbols_;
-}; // class PreFilter
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/QuerySegment.hpp
+++ b/libchinese-segmentation/cppjieba/QuerySegment.hpp
@ -1,83 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <set>
-#include <cassert>
-#include "limonp/Logging.hpp"
-#include "DictTrie.hpp"
-#include "SegmentBase.hpp"
-#include "FullSegment.hpp"
-#include "MixSegment.hpp"
-#include "Unicode.hpp"
-#include "DictTrie.hpp"
-
-namespace cppjieba {
-class QuerySegment: public SegmentBase {
-public:
-    QuerySegment(const DictTrie* dictTrie,
-                 const HMMModel* model,
-                 const string& stopWordPath)
-        : mixSeg_(dictTrie, model, stopWordPath), trie_(dictTrie) {
-    }
-    ~QuerySegment() {
-    }
-
-    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
-                     size_t) const override {
-        //use mix Cut first
-        vector<WordRange> mixRes;
-        mixSeg_.CutRuneArray(begin, end, mixRes, hmm);
-
-        vector<WordRange> fullRes;
-
-        for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
-            if (mixResItr->Length() > 2) {
-                for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
-                    string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 2);
-
-                    if (trie_->Find(text) != NULL) {
-                        WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
-                        res.push_back(wr);
-                    }
-                }
-            }
-
-            if (mixResItr->Length() > 3) {
-                for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
-                    string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 3);
-
-                    if (trie_->Find(text) != NULL) {
-                        WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
-                        res.push_back(wr);
-                    }
-                }
-            }
-
-            res.push_back(*mixResItr);
-        }
-    }
-
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t) const override {
-
-    }
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t) const override {
-
-    }
-private:
-    bool IsAllAscii(const RuneArray& s) const {
-        for (size_t i = 0; i < s.size(); i++) {
-            if (s[i] >= 0x80) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-    MixSegment mixSeg_;
-    const DictTrie* trie_;
-}; // QuerySegment
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/SegmentBase.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentBase.hpp
@ -1,94 +0,0 @@
-#pragma once
-
-#include "limonp/Logging.hpp"
-#include "PreFilter.hpp"
-#include <cassert>
-
-
-namespace cppjieba {
-
-const char* const SPECIAL_SEPARATORS = " \t\n\xEF\xBC\x8C\xE3\x80\x82";
-
-using namespace limonp;
-
-class SegmentBase {
-public:
-    SegmentBase() {
-        XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
-    }
-    virtual ~SegmentBase() { }
-
-    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
-                     size_t max_word_len) const = 0;
-    //添加基于sentence的cut方法，减少中间变量的存储与格式转换--jxx20210517
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
-                     size_t max_word_len) const = 0;
-    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
-                     size_t max_word_len) const = 0;
-    //重写CutToStr函数，简化获取vector<string>& words的流程，降低内存占用--jxx20210517
-    void CutToStr(const string& sentence, vector<string>& words, bool hmm = true,
-                  size_t max_word_len = MAX_WORD_LENGTH) const {
-        PreFilter pre_filter(symbols_, sentence);
-        words.clear();
-        words.reserve(sentence.size() / 2);//todo 参考源码，参数待定
-        RuneStrArray::const_iterator null_p;
-        WordRange range(null_p, null_p);
-        while (pre_filter.Next(range)) {
-            CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
-        }
-    }
-    void CutToStr(const string& sentence, WordRange range, vector<string>& words, bool hmm = true,
-                  size_t max_word_len = MAX_WORD_LENGTH) const {
-        CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
-    }
-    void CutToStr(const string& sentence, WordRange range, unordered_map<string, KeyWord>& words, bool hmm = true,
-                  size_t max_word_len = MAX_WORD_LENGTH) const {
-        CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
-    }
-    void CutToWord(const string& sentence, vector<Word>& words, bool hmm = true,
-                   size_t max_word_len = MAX_WORD_LENGTH) const {
-        PreFilter pre_filter(symbols_, sentence);
-        vector<WordRange> wrs;
-        wrs.reserve(sentence.size() / 2);
-
-        while (pre_filter.HasNext()) {
-            auto range = pre_filter.Next();
-            Cut(range.left, range.right, wrs, hmm, max_word_len);
-        }
-
-        words.clear();
-        words.reserve(wrs.size());
-        GetWordsFromWordRanges(sentence, wrs, words);
-        wrs.clear();
-        vector<WordRange>().swap(wrs);
-    }
-
-    void CutRuneArray(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res,
-                      bool hmm = true, size_t max_word_len = MAX_WORD_LENGTH) const {
-        Cut(begin, end, res, hmm, max_word_len);
-    }
-
-    bool ResetSeparators(const string& s) {
-        symbols_.clear();
-        RuneStrArray runes;
-
-        if (!DecodeRunesInString(s, runes)) {
-            XLOG(ERROR) << "decode " << s << " failed";
-            return false;
-        }
-
-        for (size_t i = 0; i < runes.size(); i++) {
-            if (!symbols_.insert(runes[i].rune).second) {
-                XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
-                return false;
-            }
-        }
-
-        return true;
-    }
-protected:
-    unordered_set<Rune> symbols_;
-}; // class SegmentBase
-
-} // cppjieba
-
--- a/libchinese-segmentation/cppjieba/SegmentTagged.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentTagged.hpp
@ -1,21 +0,0 @@
-#pragma once
-
-#include "SegmentBase.hpp"
-
-namespace cppjieba {
-
-class SegmentTagged : public SegmentBase {
-public:
-    SegmentTagged() {
-    }
-    virtual ~SegmentTagged() {
-    }
-
-    virtual bool Tag(const string& src, vector<pair<string, string> >& res) const = 0;
-
-    virtual const DictTrie* GetDictTrie() const = 0;
-
-}; // class SegmentTagged
-
-} // cppjieba
-
--- a/libchinese-segmentation/cppjieba/TextRankExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/TextRankExtractor.hpp
@ -1,205 +0,0 @@
-
-#include <cmath>
-#include "Jieba.hpp"
-
-namespace cppjieba {
-using namespace limonp;
-using namespace std;
-
-class TextRankExtractor {
-public:
-    typedef struct _Word {
-        string word;
-        vector<size_t> offsets;
-        double weight;
-    }    Word; // struct Word
-private:
-    typedef std::map<string, Word> WordMap;
-
-    class WordGraph {
-    private:
-        typedef double Score;
-        typedef string Node;
-        typedef std::set<Node> NodeSet;
-
-        typedef std::map<Node, double> Edges;
-        typedef std::map<Node, Edges> Graph;
-        //typedef std::unordered_map<Node,double> Edges;
-        //typedef std::unordered_map<Node,Edges> Graph;
-
-        double d;
-        Graph graph;
-        NodeSet nodeSet;
-    public:
-        WordGraph(): d(0.85) {};
-        WordGraph(double in_d): d(in_d) {};
-
-        void addEdge(Node start, Node end, double weight) {
-            Edges temp;
-            Edges::iterator gotEdges;
-            nodeSet.insert(start);
-            nodeSet.insert(end);
-            graph[start][end] += weight;
-            graph[end][start] += weight;
-        }
-
-        void rank(WordMap &ws, size_t rankTime = 10) {
-            WordMap outSum;
-            Score wsdef, min_rank, max_rank;
-
-            if (graph.size() == 0) {
-                return;
-            }
-
-            wsdef = 1.0 / graph.size();
-
-            for (Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) {
-                // edges->first start节点；edge->first end节点；edge->second 权重
-                ws[edges->first].word = edges->first;
-                ws[edges->first].weight = wsdef;
-                outSum[edges->first].weight = 0;
-
-                for (Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) {
-                    outSum[edges->first].weight += edge->second;
-                }
-            }
-
-            //sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
-            for (size_t i = 0; i < rankTime; i++) {
-                for (NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) {
-                    double s = 0;
-
-                    for (Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
-                        // edge->first end节点；edge->second 权重
-                    {
-                        s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
-                    }
-
-                    ws[*node].weight = (1 - d) + d * s;
-                }
-            }
-
-            min_rank = max_rank = ws.begin()->second.weight;
-
-            for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
-                if (i->second.weight < min_rank) {
-                    min_rank = i->second.weight;
-                }
-
-                if (i->second.weight > max_rank) {
-                    max_rank = i->second.weight;
-                }
-            }
-
-            for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
-                ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
-            }
-        }
-    };
-
-public:
-    TextRankExtractor(const DictTrie* dictTrie,
-                      const HMMModel* model,
-                      const string& stopWordPath)
-        : segment_(dictTrie, model) {
-        LoadStopWordDict(stopWordPath);
-    }
-    TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
-        LoadStopWordDict(stopWordPath);
-    }
-    ~TextRankExtractor() {
-    }
-
-    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
-        vector<Word> topWords;
-        Extract(sentence, topWords, topN);
-
-        for (size_t i = 0; i < topWords.size(); i++) {
-            keywords.push_back(topWords[i].word);
-        }
-    }
-
-    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
-        vector<Word> topWords;
-        Extract(sentence, topWords, topN);
-
-        for (size_t i = 0; i < topWords.size(); i++) {
-            keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
-        }
-    }
-
-    void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
-        vector<string> words;
-        segment_.CutToStr(sentence, words);
-
-        TextRankExtractor::WordGraph graph;
-        WordMap wordmap;
-        size_t offset = 0;
-
-        for (size_t i = 0; i < words.size(); i++) {
-            size_t t = offset;
-            offset += words[i].size();
-
-            if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
-                continue;
-            }
-
-            for (size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) {
-                if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
-                    skip++;
-                    continue;
-                }
-
-                graph.addEdge(words[i], words[j], 1);
-            }
-
-            wordmap[words[i]].offsets.push_back(t);
-        }
-
-        if (offset != sentence.size()) {
-            XLOG(ERROR) << "words illegal";
-            return;
-        }
-
-        graph.rank(wordmap, rankTime);
-
-        keywords.clear();
-        keywords.reserve(wordmap.size());
-
-        for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
-            keywords.push_back(itr->second);
-        }
-
-        topN = min(topN, keywords.size());
-        partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
-        keywords.resize(topN);
-    }
-private:
-    void LoadStopWordDict(const string& filePath) {
-        ifstream ifs(filePath.c_str());
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed";
-        string line ;
-
-        while (getline(ifs, line)) {
-            stopWords_.insert(line);
-        }
-
-        assert(stopWords_.size());
-    }
-
-    static bool Compare(const Word &x, const Word &y) {
-        return x.weight > y.weight;
-    }
-
-    MixSegment segment_;
-    unordered_set<string> stopWords_;
-}; // class TextRankExtractor
-
-inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
-    return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
-           "}";
-}
-} // namespace cppjieba
-
-
-
--- a/libchinese-segmentation/cppjieba/Unicode.hpp
+++ b/libchinese-segmentation/cppjieba/Unicode.hpp
@ -1,264 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-#include <stdlib.h>
-#include <string>
-#include <vector>
-#include <ostream>
-#include "limonp/LocalVector.hpp"
-#include "limonp/StringUtil.hpp"
-#include "common-struct.h"
-
-namespace cppjieba {
-
-using std::string;
-using std::vector;
-
-typedef uint32_t Rune;
-
-//struct KeyWord {
-//    string word;
-//    vector<size_t> offsets;
-//    double weight;
-//}; // struct Word
-
-//struct Word {
-//    string word;
-//    uint32_t offset;
-//    uint32_t unicode_offset;
-//    uint32_t unicode_length;
-//    Word(const string& w, uint32_t o)
-//        : word(w), offset(o) {
-//    }
-//    Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
-//        : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
-//    }
-//}; // struct Word
-
-inline std::ostream& operator << (std::ostream& os, const Word& w) {
-    return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
-}
-
-struct RuneInfo {
-    Rune rune;
-    uint32_t offset;
-    uint32_t len;
-    uint32_t unicode_offset = 0;
-    uint32_t unicode_length = 0;
-    RuneInfo(): rune(0), offset(0), len(0) {
-    }
-    RuneInfo(Rune r, uint32_t o, uint32_t l)
-        : rune(r), offset(o), len(l) {
-    }
-    RuneInfo(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
-        : rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
-    }
-}; // struct RuneInfo
-
-inline std::ostream& operator << (std::ostream& os, const RuneInfo& r) {
-    return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
-}
-
-typedef limonp::LocalVector<Rune> RuneArray;
-typedef limonp::LocalVector<struct RuneInfo> RuneStrArray;
-
-// [left, right]
-struct WordRange {
-    RuneStrArray::const_iterator left;
-    RuneStrArray::const_iterator right;
-    WordRange(RuneStrArray::const_iterator l, RuneStrArray::const_iterator r)
-        : left(l), right(r) {
-    }
-    size_t Length() const {
-        return right - left;
-    }
-
-    bool IsAllAscii() const {
-        for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
-            if (iter->rune >= 0x80) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-}; // struct WordRange
-
-
-inline bool DecodeRunesInString(const string& s, RuneArray& arr) {
-    arr.clear();
-    return limonp::Utf8ToUnicode32(s, arr);
-}
-
-inline RuneArray DecodeRunesInString(const string& s) {
-    RuneArray result;
-    DecodeRunesInString(s, result);
-    return result;
-}
-
-//重写DecodeRunesInString函数，将实现放入函数中降低内存占用加快处理流程--jxx20210518
-inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
-
-    uint32_t tmp;
-    uint32_t offset = 0;
-    runes.clear();
-    uint32_t len(0);
-    for (size_t i = 0; i < s.size();) {
-      if (!(s.data()[i] & 0x80)) { // 0xxxxxxx
-        // 7bit, total 7bit
-        tmp = (uint8_t)(s.data()[i]) & 0x7f;
-        i++;
-        len = 1;
-      } else if ((uint8_t)s.data()[i] <= 0xdf && i + 1 < s.size()) { // 110xxxxxx
-        // 5bit, total 5bit
-        tmp = (uint8_t)(s.data()[i]) & 0x1f;
-
-        // 6bit, total 11bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
-        i += 2;
-        len = 2;
-      } else if((uint8_t)s.data()[i] <= 0xef && i + 2 < s.size()) { // 1110xxxxxx
-        // 4bit, total 4bit
-        tmp = (uint8_t)(s.data()[i]) & 0x0f;
-
-        // 6bit, total 10bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
-
-        // 6bit, total 16bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
-
-        i += 3;
-        len = 3;
-      } else if((uint8_t)s.data()[i] <= 0xf7 && i + 3 < s.size()) { // 11110xxxx
-        // 3bit, total 3bit
-        tmp = (uint8_t)(s.data()[i]) & 0x07;
-
-        // 6bit, total 9bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
-
-        // 6bit, total 15bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
-
-        // 6bit, total 21bit
-        tmp <<= 6;
-        tmp |= (uint8_t)(s.data()[i+3]) & 0x3f;
-
-        i += 4;
-        len = 4;
-      } else {
-        return false;
-      }
-      RuneInfo x(tmp, offset, len, i, 1);
-      runes.push_back(x);
-      offset += len;
-    }
-    return true;
-}
-
-class RunePtrWrapper {
-public:
-    const RuneInfo * m_ptr = nullptr;
-
-public:
-    explicit RunePtrWrapper(const RuneInfo * p) : m_ptr(p) {}
-
-    uint32_t operator *() {
-        return m_ptr->rune;
-    }
-
-    RunePtrWrapper operator ++(int) {
-        m_ptr ++;
-        return RunePtrWrapper(m_ptr);
-    }
-
-    bool operator !=(const RunePtrWrapper & b) const {
-        return this->m_ptr != b.m_ptr;
-    }
-};
-
-inline string EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) {
-    string str;
-    RunePtrWrapper it_begin(begin), it_end(end);
-    limonp::Unicode32ToUtf8(it_begin, it_end, str);
-    return str;
-}
-
-inline void EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, string& str) {
-    RunePtrWrapper it_begin(begin), it_end(end);
-    limonp::Unicode32ToUtf8(it_begin, it_end, str);
-    return;
-}
-
-class Unicode32Counter {
-public :
-    size_t length = 0;
-    void clear() {
-        length = 0;
-    }
-    void push_back(uint32_t) {
-        ++length;
-    }
-};
-
-inline size_t Utf8CharNum(const char * str, size_t length) {
-    Unicode32Counter c;
-
-    if (limonp::Utf8ToUnicode32(str, length, c)) {
-        return c.length;
-    }
-
-    return 0;
-}
-
-inline size_t Utf8CharNum(const string & str) {
-    return Utf8CharNum(str.data(), str.size());
-}
-
-inline bool IsSingleWord(const string& str) {
-    return Utf8CharNum(str) == 1;
-}
-
-
-// [left, right]
-inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
-    assert(right->offset >= left->offset);
-    uint32_t len = right->offset - left->offset + right->len;
-    uint32_t unicode_length = right->unicode_offset - left->unicode_offset + right->unicode_length;
-    return Word(s.substr(left->offset, len), left->offset, left->unicode_offset, unicode_length);
-}
-
-inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
-    assert(right->offset >= left->offset);
-    //uint32_t len = right->offset - left->offset + right->len;
-    return s.substr(left->offset, right->offset - left->offset + right->len);
-}
-
-inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
-    for (size_t i = 0; i < wrs.size(); i++) {
-        words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
-    }
-}
-
-inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
-    for (size_t i = 0; i < wrs.size(); i++) {
-        words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
-    }
-}
-
-inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
-    strs.resize(words.size());
-
-    for (size_t i = 0; i < words.size(); ++i) {
-        strs[i] = words[i].word;
-    }
-}
-
-const size_t MAX_WORD_LENGTH = 512;
-
-} // namespace cppjieba
-
--- a/libchinese-segmentation/cppjieba/cppjieba.pri
+++ b/libchinese-segmentation/cppjieba/cppjieba.pri
@ -1,22 +0,0 @@
-INCLUDEPATH += $$PWD
-
-HEADERS += \
-    $$PWD/DictTrie.hpp \
-    $$PWD/IdfTrie.hpp \
-    $$PWD/PinYinTrie.hpp \
-    $$PWD/FullSegment.hpp \
-    $$PWD/HMMModel.hpp \
-    $$PWD/HMMSegment.hpp \
-    $$PWD/Jieba.hpp \
-    $$PWD/KeywordExtractor.hpp \
-    $$PWD/MPSegment.hpp \
-    $$PWD/MixSegment.hpp \
-    $$PWD/PosTagger.hpp \
-    $$PWD/PreFilter.hpp \
-    $$PWD/QuerySegment.hpp \
-    $$PWD/SegmentBase.hpp \
-    $$PWD/SegmentTagged.hpp \
-    $$PWD/TextRankExtractor.hpp \
-    $$PWD/Trie.hpp \
-    $$PWD/Unicode.hpp
-include(limonp/limonp.pri)
--- a/libchinese-segmentation/cppjieba/darts.h
+++ b/libchinese-segmentation/cppjieba/darts.h
--- a/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp
@ -1,70 +0,0 @@
-/************************************
- * file enc : ascii
- * author   : wuyanyi09@gmail.com
- ************************************/
-
-#ifndef LIMONP_ARGV_FUNCTS_H
-#define LIMONP_ARGV_FUNCTS_H
-
-#include <set>
-#include <sstream>
-#include "StringUtil.hpp"
-
-namespace limonp {
-
-using namespace std;
-
-class ArgvContext {
- public :
-  ArgvContext(int argc, const char* const * argv) {
-    for(int i = 0; i < argc; i++) {
-      if(StartsWith(argv[i], "-")) {
-        if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
-          mpss_[argv[i]] = argv[i+1];
-          i++;
-        } else {
-          sset_.insert(argv[i]);
-        }
-      } else {
-        args_.push_back(argv[i]);
-      }
-    }
-  }
-  ~ArgvContext() {
-  }
-
-  friend ostream& operator << (ostream& os, const ArgvContext& args);
-  string operator [](size_t i) const {
-    if(i < args_.size()) {
-      return args_[i];
-    }
-    return "";
-  }
-  string operator [](const string& key) const {
-    map<string, string>::const_iterator it = mpss_.find(key);
-    if(it != mpss_.end()) {
-      return it->second;
-    }
-    return "";
-  }
-
-  bool HasKey(const string& key) const {
-    if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
-      return true;
-    }
-    return false;
-  }
-
- private:
-  vector<string> args_;
-  map<string, string> mpss_;
-  set<string> sset_;
-}; // class ArgvContext
-
-inline ostream& operator << (ostream& os, const ArgvContext& args) {
-  return os<<args.args_<<args.mpss_<<args.sset_;
-}
-
-} // namespace limonp
-
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/BlockingQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BlockingQueue.hpp
@ -1,49 +0,0 @@
-#ifndef LIMONP_BLOCKINGQUEUE_HPP
-#define LIMONP_BLOCKINGQUEUE_HPP
-
-#include <queue>
-#include "Condition.hpp"
-
-namespace limonp {
-template<class T>
-class BlockingQueue: NonCopyable {
- public:
-  BlockingQueue()
-    : mutex_(), notEmpty_(mutex_), queue_() {
-  }
-
-  void Push(const T& x) {
-    MutexLockGuard lock(mutex_);
-    queue_.push(x);
-    notEmpty_.Notify(); // Wait morphing saves us
-  }
-
-  T Pop() {
-    MutexLockGuard lock(mutex_);
-    // always use a while-loop, due to spurious wakeup
-    while (queue_.empty()) {
-      notEmpty_.Wait();
-    }
-    assert(!queue_.empty());
-    T front(queue_.front());
-    queue_.pop();
-    return front;
-  }
-
-  size_t Size() const {
-    MutexLockGuard lock(mutex_);
-    return queue_.size();
-  }
-  bool Empty() const {
-    return Size() == 0;
-  }
-
- private:
-  mutable MutexLock mutex_;
-  Condition         notEmpty_;
-  std::queue<T>     queue_;
-}; // class BlockingQueue
-
-} // namespace limonp
-
-#endif // LIMONP_BLOCKINGQUEUE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp
@ -1,67 +0,0 @@
-#ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
-#define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
-
-#include "BoundedQueue.hpp"
-
-namespace limonp {
-
-template<typename T>
-class BoundedBlockingQueue : NonCopyable {
- public:
-  explicit BoundedBlockingQueue(size_t maxSize)
-    : mutex_(),
-      notEmpty_(mutex_),
-      notFull_(mutex_),
-      queue_(maxSize) {
-  }
-
-  void Push(const T& x) {
-    MutexLockGuard lock(mutex_);
-    while (queue_.Full()) {
-      notFull_.Wait();
-    }
-    assert(!queue_.Full());
-    queue_.Push(x);
-    notEmpty_.Notify();
-  }
-
-  T Pop() {
-    MutexLockGuard lock(mutex_);
-    while (queue_.Empty()) {
-      notEmpty_.Wait();
-    }
-    assert(!queue_.Empty());
-    T res = queue_.Pop();
-    notFull_.Notify();
-    return res;
-  }
-
-  bool Empty() const {
-    MutexLockGuard lock(mutex_);
-    return queue_.Empty();
-  }
-
-  bool Full() const {
-    MutexLockGuard lock(mutex_);
-    return queue_.Full();
-  }
-
-  size_t size() const {
-    MutexLockGuard lock(mutex_);
-    return queue_.size();
-  }
-
-  size_t capacity() const {
-    return queue_.capacity();
-  }
-
- private:
-  mutable MutexLock          mutex_;
-  Condition                  notEmpty_;
-  Condition                  notFull_;
-  BoundedQueue<T>  queue_;
-}; // class BoundedBlockingQueue
-
-} // namespace limonp
-
-#endif // LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp
@ -1,65 +0,0 @@
-#ifndef LIMONP_BOUNDED_QUEUE_HPP
-#define LIMONP_BOUNDED_QUEUE_HPP
-
-#include <vector>
-#include <fstream>
-#include <cassert>
-
-namespace limonp {
-using namespace std;
-template<class T>
-class BoundedQueue {
- public:
-  explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
-    head_ = 0;
-    tail_ = 0;
-    size_ = 0;
-    assert(capacity_);
-  }
-  ~BoundedQueue() {
-  }
-
-  void Clear() {
-    head_ = 0;
-    tail_ = 0;
-    size_ = 0;
-  }
-  bool Empty() const {
-    return !size_;
-  }
-  bool Full() const {
-    return capacity_ == size_;
-  }
-  size_t Size() const {
-    return size_;
-  }
-  size_t Capacity() const {
-    return capacity_;
-  }
-
-  void Push(const T& t) {
-    assert(!Full());
-    circular_buffer_[tail_] = t;
-    tail_ = (tail_ + 1) % capacity_;
-    size_ ++;
-  }
-
-  T Pop() {
-    assert(!Empty());
-    size_t oldPos = head_;
-    head_ = (head_ + 1) % capacity_;
-    size_ --;
-    return circular_buffer_[oldPos];
-  }
-
- private:
-  size_t head_;
-  size_t tail_;
-  size_t size_;
-  const size_t capacity_;
-  vector<T> circular_buffer_;
-
-}; // class BoundedQueue
-} // namespace limonp
-
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/Closure.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Closure.hpp
@ -1,206 +0,0 @@
-#ifndef LIMONP_CLOSURE_HPP
-#define LIMONP_CLOSURE_HPP
-
-namespace limonp {
-
-class ClosureInterface {
- public:
-  virtual ~ClosureInterface() {
-  }
-  virtual void Run() = 0;
-};
-
-template <class Funct>
-class Closure0: public ClosureInterface {
- public:
-  Closure0(Funct fun) {
-    fun_ = fun;
-  }
-  virtual ~Closure0() {
-  }
-  virtual void Run() {
-    (*fun_)();
-  }
- private:
-  Funct fun_;
-}; 
-
-template <class Funct, class Arg1>
-class Closure1: public ClosureInterface {
- public:
-  Closure1(Funct fun, Arg1 arg1) {
-    fun_ = fun;
-    arg1_ = arg1;
-  }
-  virtual ~Closure1() {
-  }
-  virtual void Run() {
-    (*fun_)(arg1_);
-  }
- private:
-  Funct fun_;
-  Arg1 arg1_;
-}; 
-
-template <class Funct, class Arg1, class Arg2>
-class Closure2: public ClosureInterface {
- public:
-  Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
-    fun_ = fun;
-    arg1_ = arg1;
-    arg2_ = arg2;
-  }
-  virtual ~Closure2() {
-  }
-  virtual void Run() {
-    (*fun_)(arg1_, arg2_);
-  }
- private:
-  Funct fun_;
-  Arg1 arg1_;
-  Arg2 arg2_;
-}; 
-
-template <class Funct, class Arg1, class Arg2, class Arg3>
-class Closure3: public ClosureInterface {
- public:
-  Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
-    fun_ = fun;
-    arg1_ = arg1;
-    arg2_ = arg2;
-    arg3_ = arg3;
-  }
-  virtual ~Closure3() {
-  }
-  virtual void Run() {
-    (*fun_)(arg1_, arg2_, arg3_);
-  }
- private:
-  Funct fun_;
-  Arg1 arg1_;
-  Arg2 arg2_;
-  Arg3 arg3_;
-}; 
-
-template <class Obj, class Funct> 
-class ObjClosure0: public ClosureInterface {
- public:
-  ObjClosure0(Obj* p, Funct fun) {
-   p_ = p;
-   fun_ = fun;
-  }
-  virtual ~ObjClosure0() {
-  }
-  virtual void Run() {
-    (p_->*fun_)();
-  }
- private:
-  Obj* p_;
-  Funct fun_;
-}; 
-
-template <class Obj, class Funct, class Arg1> 
-class ObjClosure1: public ClosureInterface {
- public:
-  ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
-   p_ = p;
-   fun_ = fun;
-   arg1_ = arg1;
-  }
-  virtual ~ObjClosure1() {
-  }
-  virtual void Run() {
-    (p_->*fun_)(arg1_);
-  }
- private:
-  Obj* p_;
-  Funct fun_;
-  Arg1 arg1_;
-}; 
-
-template <class Obj, class Funct, class Arg1, class Arg2> 
-class ObjClosure2: public ClosureInterface {
- public:
-  ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
-   p_ = p;
-   fun_ = fun;
-   arg1_ = arg1;
-   arg2_ = arg2;
-  }
-  virtual ~ObjClosure2() {
-  }
-  virtual void Run() {
-    (p_->*fun_)(arg1_, arg2_);
-  }
- private:
-  Obj* p_;
-  Funct fun_;
-  Arg1 arg1_;
-  Arg2 arg2_;
-}; 
-template <class Obj, class Funct, class Arg1, class Arg2, class Arg3> 
-class ObjClosure3: public ClosureInterface {
- public:
-  ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
-   p_ = p;
-   fun_ = fun;
-   arg1_ = arg1;
-   arg2_ = arg2;
-   arg3_ = arg3;
-  }
-  virtual ~ObjClosure3() {
-  }
-  virtual void Run() {
-    (p_->*fun_)(arg1_, arg2_, arg3_);
-  }
- private:
-  Obj* p_;
-  Funct fun_;
-  Arg1 arg1_;
-  Arg2 arg2_;
-  Arg3 arg3_;
-}; 
-
-template<class R>
-ClosureInterface* NewClosure(R (*fun)()) {
-  return new Closure0<R (*)()>(fun);
-}
-
-template<class R, class Arg1>
-ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) {
-  return new Closure1<R (*)(Arg1), Arg1>(fun, arg1);
-}
-
-template<class R, class Arg1, class Arg2>
-ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
-  return new Closure2<R (*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
-}
-
-template<class R, class Arg1, class Arg2, class Arg3>
-ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
-  return new Closure3<R (*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
-}
-
-template<class R, class Obj>
-ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) {
-  return new ObjClosure0<Obj, R (Obj::* )()>(obj, fun);
-}
-
-template<class R, class Obj, class Arg1>
-ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) {
-  return new ObjClosure1<Obj, R (Obj::* )(Arg1), Arg1>(obj, fun, arg1);
-}
-
-template<class R, class Obj, class Arg1, class Arg2>
-ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
-  return new ObjClosure2<Obj, R (Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
-}
-
-template<class R, class Obj, class Arg1, class Arg2, class Arg3>
-ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
-  return new ObjClosure3<Obj, R (Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
-}
-
-} // namespace limonp
-
-#endif // LIMONP_CLOSURE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Colors.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Colors.hpp
@ -1,31 +0,0 @@
-#ifndef LIMONP_COLOR_PRINT_HPP
-#define LIMONP_COLOR_PRINT_HPP
-
-#include <string>
-#include <stdarg.h>
-
-namespace limonp {
-
-using std::string;
-
-enum Color {
-  BLACK = 30,
-  RED,
-  GREEN,
-  YELLOW,
-  BLUE,
-  PURPLE
-}; // enum Color
-
-static void ColorPrintln(enum Color color, const char * fmt, ...) {
-  va_list ap;
-  printf("\033[0;%dm", color);
-  va_start(ap, fmt);
-  vprintf(fmt, ap);
-  va_end(ap);
-  printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
-}
-
-} // namespace limonp
-
-#endif // LIMONP_COLOR_PRINT_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Condition.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Condition.hpp
@ -1,38 +0,0 @@
-#ifndef LIMONP_CONDITION_HPP
-#define LIMONP_CONDITION_HPP
-
-#include "MutexLock.hpp"
-
-namespace limonp {
-
-class Condition : NonCopyable {
- public:
-  explicit Condition(MutexLock& mutex)
-    : mutex_(mutex) {
-    XCHECK(!pthread_cond_init(&pcond_, NULL));
-  }
-
-  ~Condition() {
-    XCHECK(!pthread_cond_destroy(&pcond_));
-  }
-
-  void Wait() {
-    XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
-  }
-
-  void Notify() {
-    XCHECK(!pthread_cond_signal(&pcond_));
-  }
-
-  void NotifyAll() {
-    XCHECK(!pthread_cond_broadcast(&pcond_));
-  }
-
- private:
-  MutexLock& mutex_;
-  pthread_cond_t pcond_;
-}; // class Condition
-
-} // namespace limonp
-
-#endif // LIMONP_CONDITION_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Config.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Config.hpp
@ -1,103 +0,0 @@
-/************************************
- * file enc : utf8
- * author   : wuyanyi09@gmail.com
- ************************************/
-#ifndef LIMONP_CONFIG_H
-#define LIMONP_CONFIG_H
-
-#include <map>
-#include <fstream>
-#include <iostream>
-#include <assert.h>
-#include "StringUtil.hpp"
-
-namespace limonp {
-
-using namespace std;
-
-class Config {
- public:
-  explicit Config(const string& filePath) {
-    LoadFile(filePath);
-  }
-
-  operator bool () {
-    return !map_.empty();
-  }
-
-  string Get(const string& key, const string& defaultvalue) const {
-    map<string, string>::const_iterator it = map_.find(key);
-    if(map_.end() != it) {
-      return it->second;
-    }
-    return defaultvalue;
-  }
-  int Get(const string& key, int defaultvalue) const {
-    string str = Get(key, "");
-    if("" == str) {
-      return defaultvalue;
-    }
-    return atoi(str.c_str());
-  }
-  const char* operator [] (const char* key) const {
-    if(NULL == key) {
-      return NULL;
-    }
-    map<string, string>::const_iterator it = map_.find(key);
-    if(map_.end() != it) {
-      return it->second.c_str();
-    }
-    return NULL;
-  }
-
-  string GetConfigInfo() const {
-    string res;
-    res << *this;
-    return res;
-  }
-
- private:
-  void LoadFile(const string& filePath) {
-    ifstream ifs(filePath.c_str());
-    assert(ifs);
-    string line;
-    vector<string> vecBuf;
-    size_t lineno = 0;
-    while(getline(ifs, line)) {
-      lineno ++;
-      Trim(line);
-      if(line.empty() || StartsWith(line, "#")) {
-        continue;
-      }
-      vecBuf.clear();
-      Split(line, vecBuf, "=");
-      if(2 != vecBuf.size()) {
-        fprintf(stderr, "line[%s] illegal.\n", line.c_str());
-        assert(false);
-        continue;
-      }
-      string& key = vecBuf[0];
-      string& value = vecBuf[1];
-      Trim(key);
-      Trim(value);
-      if(!map_.insert(make_pair(key, value)).second) {
-        fprintf(stderr, "key[%s] already exits.\n", key.c_str());
-        assert(false);
-        continue;
-      }
-    }
-    ifs.close();
-  }
-
-  friend ostream& operator << (ostream& os, const Config& config);
-
-  map<string, string> map_;
-}; // class Config
-
-inline ostream& operator << (ostream& os, const Config& config) {
-  return os << config.map_;
-}
-
-} // namespace limonp
-
-#endif // LIMONP_CONFIG_H
--- a/libchinese-segmentation/cppjieba/limonp/FileLock.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/FileLock.hpp
@ -1,74 +0,0 @@
-#ifndef LIMONP_FILELOCK_HPP
-#define LIMONP_FILELOCK_HPP
-
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <string>
-#include <string.h>
-#include <assert.h>
-
-namespace limonp {
-
-using std::string;
-
-class FileLock {
- public:
-  FileLock() : fd_(-1), ok_(true) {
-  }
-  ~FileLock() {
-    if(fd_ > 0) {
-      Close();
-    }
-  }
-  void Open(const string& fname) {
-    assert(fd_ == -1);
-    fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
-    if(fd_ < 0) {
-      ok_ = false;
-      err_ = strerror(errno);
-    }
-  }
-  void Close() {
-    ::close(fd_);
-  }
-  void Lock() {
-    if(LockOrUnlock(fd_, true) < 0) {
-      ok_ = false;
-      err_ = strerror(errno);
-    }
-  }
-  void UnLock() {
-    if(LockOrUnlock(fd_, false) < 0) {
-      ok_ = false;
-      err_ = strerror(errno);
-    }
-  }
-  bool Ok() const {
-    return ok_;
-  }
-  string Error() const {
-    return err_;
-  }
- private:
-  static int LockOrUnlock(int fd, bool lock) {
-    errno = 0;
-    struct flock f;
-    memset(&f, 0, sizeof(f));
-    f.l_type = (lock ? F_WRLCK : F_UNLCK);
-    f.l_whence = SEEK_SET;
-    f.l_start = 0;
-    f.l_len = 0;        // Lock/unlock entire file
-    return fcntl(fd, F_SETLK, &f);
-  }
-
-  int fd_;
-  bool ok_;
-  string err_;
-}; // class FileLock
-
-}// namespace limonp
-
-#endif // LIMONP_FILELOCK_HPP
--- a/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp
@ -1,7 +0,0 @@
-#ifndef LIMONP_FORCE_PUBLIC_H
-#define LIMONP_FORCE_PUBLIC_H
-
-#define private public
-#define protected public
-
-#endif // LIMONP_FORCE_PUBLIC_H
--- a/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp
@ -1,142 +0,0 @@
-#ifndef LIMONP_LOCAL_VECTOR_HPP
-#define LIMONP_LOCAL_VECTOR_HPP
-
-#include <iostream>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-
-namespace limonp {
-using namespace std;
-/*
- * LocalVector<T> : T must be primitive type (char , int, size_t), if T is struct or class, LocalVector<T> may be dangerous..
- * LocalVector<T> is simple and not well-tested.
- */
-const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
-template <class T>
-class LocalVector {
- public:
-  typedef const T* const_iterator ;
-  typedef T value_type;
-  typedef size_t size_type;
- private:
-  T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
-  T * ptr_;
-  size_t size_;
-  size_t capacity_;
- public:
-  LocalVector() {
-    init_();
-  };
-  LocalVector(const LocalVector<T>& vec) {
-    init_();
-    *this = vec;
-  }
-  LocalVector(const_iterator  begin, const_iterator end) { // TODO: make it faster
-    init_();
-    while(begin != end) {
-      push_back(*begin++);
-    }
-  }
-  LocalVector(size_t size, const T& t) { // TODO: make it faster
-    init_();
-    while(size--) {
-      push_back(t);
-    }
-  }
-  ~LocalVector() {
-    if(ptr_ != buffer_) {
-      free(ptr_);
-    }
-  };
- public:
-  LocalVector<T>& operator = (const LocalVector<T>& vec) {
-      if(this == &vec){
-          return *this;
-      }
-    clear();
-    size_ = vec.size();
-    capacity_ = vec.capacity();
-    if(vec.buffer_ == vec.ptr_) {
-      memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
-      ptr_ = buffer_;
-    } else {
-      ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
-      assert(ptr_);
-      memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
-    }
-    return *this;
-  }
- private:
-  void init_() {
-    ptr_ = buffer_;
-    size_ = 0;
-    capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
-  }
- public:
-  T& operator [] (size_t i) {
-    return ptr_[i];
-  }
-  const T& operator [] (size_t i) const {
-    return ptr_[i];
-  }
-  void push_back(const T& t) {
-    if(size_ == capacity_) {
-      assert(capacity_);
-      reserve(capacity_ * 2);
-    }
-    ptr_[size_ ++ ] = t;
-  }
-  void reserve(size_t size) {
-    if(size <= capacity_) {
-      return;
-    }
-    T * next =  (T*)malloc(sizeof(T) * size);
-    assert(next);
-    T * old = ptr_;
-    ptr_ = next;
-    memcpy(ptr_, old, sizeof(T) * capacity_);
-    capacity_ = size;
-    if(old != buffer_) {
-      free(old);
-    }
-  }
-  bool empty() const {
-    return 0 == size();
-  }
-  size_t size() const {
-    return size_;
-  }
-  size_t capacity() const {
-    return capacity_;
-  }
-  const_iterator begin() const {
-    return ptr_;
-  }
-  const_iterator end() const {
-    return ptr_ + size_;
-  }
-  void clear() {
-    if(ptr_ != buffer_) {
-      free(ptr_);
-    }
-    init_();
-  }
-};
-
-template <class T>
-ostream & operator << (ostream& os, const LocalVector<T>& vec) {
-  if(vec.empty()) {
-    return os << "[]";
-  }
-  os<<"[\""<<vec[0];
-  for(size_t i = 1; i < vec.size(); i++) {
-    os<<"\", \""<<vec[i];
-  }
-  os<<"\"]";
-  return os;
-}
-
-}
-
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/Logging.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Logging.hpp
@ -1,77 +0,0 @@
-#ifndef LIMONP_LOGGING_HPP
-#define LIMONP_LOGGING_HPP
-
-#include <sstream>
-#include <iostream>
-#include <cassert>
-#include <cstdlib>
-#include <ctime>
-
-#ifdef XLOG
-#error "XLOG has been defined already"
-#endif // XLOG
-#ifdef XCHECK
-#error "XCHECK has been defined already"
-#endif // XCHECK
-
-#define XLOG(level) limonp::Logger(limonp::LL_##level, __FILE__, __LINE__).Stream()
-#define XCHECK(exp) if(!(exp)) XLOG(FATAL) << "exp: ["#exp << "] false. "
-
-namespace limonp {
-
-enum {
-  LL_DEBUG = 0,
-  LL_INFO = 1,
-  LL_WARNING = 2,
-  LL_ERROR = 3,
-  LL_FATAL = 4,
-}; // enum
-
-static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
-
-class Logger {
- public:
-  Logger(size_t level, const char* filename, int lineno)
-   : level_(level) {
-#ifdef LOGGING_LEVEL
-     if (level_ < LOGGING_LEVEL) {
-       return;
-     }
-#endif
-    assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
-    char buf[32];
-    time_t now;
-    time(&now);
-    struct tm result;
-    localtime_r(&now, &result);
-    strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &result);
-    stream_ << buf
-      << " " << filename
-      << ":" << lineno
-      << " " << LOG_LEVEL_ARRAY[level_]
-      << " ";
-  }
-  ~Logger() {
-#ifdef LOGGING_LEVEL
-     if (level_ < LOGGING_LEVEL) {
-       return;
-     }
-#endif
-    std::cerr << stream_.str() << std::endl;
-    if (level_ == LL_FATAL) {
-      abort();
-    }
-  }
-
-  std::ostream& Stream() {
-    return stream_;
-  }
-
- private:
-  std::ostringstream stream_;
-  size_t level_;
-}; // class Logger
-
-} // namespace limonp
-
-#endif // LIMONP_LOGGING_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Md5.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Md5.hpp
@ -1,415 +0,0 @@
-/****************************************************************************
-**Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991
-**              2020, KylinSoft Co., Ltd.
-**All rights reserved.
-**
-**License to copy and use this software is granted provided that it
-**is identified as the "RSA Data Security, Inc. MD5 Message-Digest
-**Algorithm" in all material mentioning or referencing this software
-**or this function.
-**
-**License is also granted to make and use derivative works provided
-**that such works are identified as "derived from the RSA Data
-**Security, Inc. MD5 Message-Digest Algorithm" in all material
-**mentioning or referencing the derived work.
-**
-**RSA Data Security, Inc. makes no representations concerning either
-**the merchantability of this software or the suitability of this
-**software for any particular purpose. It is provided "as is"
-**without express or implied warranty of any kind.
-**
-**These notices must be retained in any copies of any part of this
-**documentation and/or software.
-**
-**
-**
-**The original md5 implementation avoids external libraries.
-**This version has dependency on stdio.h for file input and
-**string.h for memcpy.
-**
-****************************************************************************/
-
-#ifndef __MD5_H__
-#define __MD5_H__
-#include <cstdio>
-#include <cstring>
-#include <iostream>
-
-namespace limonp {
-
-//#pragma region MD5 defines
-// Constants for MD5Transform routine.
-#define S11 7
-#define S12 12
-#define S13 17
-#define S14 22
-#define S21 5
-#define S22 9
-#define S23 14
-#define S24 20
-#define S31 4
-#define S32 11
-#define S33 16
-#define S34 23
-#define S41 6
-#define S42 10
-#define S43 15
-#define S44 21
-
-
-// F, G, H and I are basic MD5 functions.
-#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
-#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
-#define H(x, y, z) ((x) ^ (y) ^ (z))
-#define I(x, y, z) ((y) ^ ((x) | (~z)))
-
-// ROTATE_LEFT rotates x left n bits.
-#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
-
-// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
-// Rotation is separate from addition to prevent recomputation.
-#define FF(a, b, c, d, x, s, ac) { \
-  (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
-  (a) = ROTATE_LEFT ((a), (s)); \
-  (a) += (b); \
-  }
-#define GG(a, b, c, d, x, s, ac) { \
-  (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
-  (a) = ROTATE_LEFT ((a), (s)); \
-  (a) += (b); \
-  }
-#define HH(a, b, c, d, x, s, ac) { \
-  (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
-  (a) = ROTATE_LEFT ((a), (s)); \
-  (a) += (b); \
-  }
-#define II(a, b, c, d, x, s, ac) { \
-  (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
-  (a) = ROTATE_LEFT ((a), (s)); \
-  (a) += (b); \
-  }
-//#pragma endregion
-
-
-typedef unsigned char BYTE ;
-
-// POINTER defines a generic pointer type
-typedef unsigned char *POINTER;
-
-// UINT2 defines a two byte word
-typedef unsigned short int UINT2;
-
-// UINT4 defines a four byte word
-typedef unsigned int UINT4;
-
-static unsigned char PADDING[64] = {
-    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-// convenient object that wraps
-// the C-functions for use in C++ only
-class MD5 {
-private:
-    struct __context_t {
-        UINT4 state[4];                                   /* state (ABCD) */
-        UINT4 count[2];        /* number of bits, modulo 2^64 (lsb first) */
-        unsigned char buffer[64];                         /* input buffer */
-    } context ;
-
-    //#pragma region static helper functions
-    // The core of the MD5 algorithm is here.
-    // MD5 basic transformation. Transforms state based on block.
-    static void MD5Transform(UINT4 state[4], unsigned char block[64]) {
-        UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
-
-        Decode(x, block, 64);
-
-        /* Round 1 */
-        FF(a, b, c, d, x[ 0], S11, 0xd76aa478);  /* 1 */
-        FF(d, a, b, c, x[ 1], S12, 0xe8c7b756);  /* 2 */
-        FF(c, d, a, b, x[ 2], S13, 0x242070db);  /* 3 */
-        FF(b, c, d, a, x[ 3], S14, 0xc1bdceee);  /* 4 */
-        FF(a, b, c, d, x[ 4], S11, 0xf57c0faf);  /* 5 */
-        FF(d, a, b, c, x[ 5], S12, 0x4787c62a);  /* 6 */
-        FF(c, d, a, b, x[ 6], S13, 0xa8304613);  /* 7 */
-        FF(b, c, d, a, x[ 7], S14, 0xfd469501);  /* 8 */
-        FF(a, b, c, d, x[ 8], S11, 0x698098d8);  /* 9 */
-        FF(d, a, b, c, x[ 9], S12, 0x8b44f7af);  /* 10 */
-        FF(c, d, a, b, x[10], S13, 0xffff5bb1);  /* 11 */
-        FF(b, c, d, a, x[11], S14, 0x895cd7be);  /* 12 */
-        FF(a, b, c, d, x[12], S11, 0x6b901122);  /* 13 */
-        FF(d, a, b, c, x[13], S12, 0xfd987193);  /* 14 */
-        FF(c, d, a, b, x[14], S13, 0xa679438e);  /* 15 */
-        FF(b, c, d, a, x[15], S14, 0x49b40821);  /* 16 */
-
-        /* Round 2 */
-        GG(a, b, c, d, x[ 1], S21, 0xf61e2562);  /* 17 */
-        GG(d, a, b, c, x[ 6], S22, 0xc040b340);  /* 18 */
-        GG(c, d, a, b, x[11], S23, 0x265e5a51);  /* 19 */
-        GG(b, c, d, a, x[ 0], S24, 0xe9b6c7aa);  /* 20 */
-        GG(a, b, c, d, x[ 5], S21, 0xd62f105d);  /* 21 */
-        GG(d, a, b, c, x[10], S22,  0x2441453);  /* 22 */
-        GG(c, d, a, b, x[15], S23, 0xd8a1e681);  /* 23 */
-        GG(b, c, d, a, x[ 4], S24, 0xe7d3fbc8);  /* 24 */
-        GG(a, b, c, d, x[ 9], S21, 0x21e1cde6);  /* 25 */
-        GG(d, a, b, c, x[14], S22, 0xc33707d6);  /* 26 */
-        GG(c, d, a, b, x[ 3], S23, 0xf4d50d87);  /* 27 */
-        GG(b, c, d, a, x[ 8], S24, 0x455a14ed);  /* 28 */
-        GG(a, b, c, d, x[13], S21, 0xa9e3e905);  /* 29 */
-        GG(d, a, b, c, x[ 2], S22, 0xfcefa3f8);  /* 30 */
-        GG(c, d, a, b, x[ 7], S23, 0x676f02d9);  /* 31 */
-        GG(b, c, d, a, x[12], S24, 0x8d2a4c8a);  /* 32 */
-
-        /* Round 3 */
-        HH(a, b, c, d, x[ 5], S31, 0xfffa3942);  /* 33 */
-        HH(d, a, b, c, x[ 8], S32, 0x8771f681);  /* 34 */
-        HH(c, d, a, b, x[11], S33, 0x6d9d6122);  /* 35 */
-        HH(b, c, d, a, x[14], S34, 0xfde5380c);  /* 36 */
-        HH(a, b, c, d, x[ 1], S31, 0xa4beea44);  /* 37 */
-        HH(d, a, b, c, x[ 4], S32, 0x4bdecfa9);  /* 38 */
-        HH(c, d, a, b, x[ 7], S33, 0xf6bb4b60);  /* 39 */
-        HH(b, c, d, a, x[10], S34, 0xbebfbc70);  /* 40 */
-        HH(a, b, c, d, x[13], S31, 0x289b7ec6);  /* 41 */
-        HH(d, a, b, c, x[ 0], S32, 0xeaa127fa);  /* 42 */
-        HH(c, d, a, b, x[ 3], S33, 0xd4ef3085);  /* 43 */
-        HH(b, c, d, a, x[ 6], S34,  0x4881d05);  /* 44 */
-        HH(a, b, c, d, x[ 9], S31, 0xd9d4d039);  /* 45 */
-        HH(d, a, b, c, x[12], S32, 0xe6db99e5);  /* 46 */
-        HH(c, d, a, b, x[15], S33, 0x1fa27cf8);  /* 47 */
-        HH(b, c, d, a, x[ 2], S34, 0xc4ac5665);  /* 48 */
-
-        /* Round 4 */
-        II(a, b, c, d, x[ 0], S41, 0xf4292244);  /* 49 */
-        II(d, a, b, c, x[ 7], S42, 0x432aff97);  /* 50 */
-        II(c, d, a, b, x[14], S43, 0xab9423a7);  /* 51 */
-        II(b, c, d, a, x[ 5], S44, 0xfc93a039);  /* 52 */
-        II(a, b, c, d, x[12], S41, 0x655b59c3);  /* 53 */
-        II(d, a, b, c, x[ 3], S42, 0x8f0ccc92);  /* 54 */
-        II(c, d, a, b, x[10], S43, 0xffeff47d);  /* 55 */
-        II(b, c, d, a, x[ 1], S44, 0x85845dd1);  /* 56 */
-        II(a, b, c, d, x[ 8], S41, 0x6fa87e4f);  /* 57 */
-        II(d, a, b, c, x[15], S42, 0xfe2ce6e0);  /* 58 */
-        II(c, d, a, b, x[ 6], S43, 0xa3014314);  /* 59 */
-        II(b, c, d, a, x[13], S44, 0x4e0811a1);  /* 60 */
-        II(a, b, c, d, x[ 4], S41, 0xf7537e82);  /* 61 */
-        II(d, a, b, c, x[11], S42, 0xbd3af235);  /* 62 */
-        II(c, d, a, b, x[ 2], S43, 0x2ad7d2bb);  /* 63 */
-        II(b, c, d, a, x[ 9], S44, 0xeb86d391);  /* 64 */
-
-        state[0] += a;
-        state[1] += b;
-        state[2] += c;
-        state[3] += d;
-
-        // Zeroize sensitive information.
-        memset((POINTER)x, 0, sizeof(x));
-    }
-
-    // Encodes input (UINT4) into output (unsigned char). Assumes len is
-    // a multiple of 4.
-    static void Encode(unsigned char *output, UINT4 *input, unsigned int len) {
-        unsigned int i, j;
-
-        for(i = 0, j = 0; j < len; i++, j += 4) {
-            output[j] = (unsigned char)(input[i] & 0xff);
-            output[j + 1] = (unsigned char)((input[i] >> 8) & 0xff);
-            output[j + 2] = (unsigned char)((input[i] >> 16) & 0xff);
-            output[j + 3] = (unsigned char)((input[i] >> 24) & 0xff);
-        }
-    }
-
-    // Decodes input (unsigned char) into output (UINT4). Assumes len is
-    // a multiple of 4.
-    static void Decode(UINT4 *output, unsigned char *input, unsigned int len) {
-        unsigned int i, j;
-
-        for(i = 0, j = 0; j < len; i++, j += 4)
-            output[i] = ((UINT4)input[j]) | (((UINT4)input[j + 1]) << 8) |
-                        (((UINT4)input[j + 2]) << 16) | (((UINT4)input[j + 3]) << 24);
-    }
-    //#pragma endregion
-
-
-public:
-    // MAIN FUNCTIONS
-    MD5() {
-        Init() ;
-    }
-
-    // MD5 initialization. Begins an MD5 operation, writing a new context.
-    void Init() {
-        context.count[0] = context.count[1] = 0;
-
-        // Load magic initialization constants.
-        context.state[0] = 0x67452301;
-        context.state[1] = 0xefcdab89;
-        context.state[2] = 0x98badcfe;
-        context.state[3] = 0x10325476;
-    }
-
-    // MD5 block update operation. Continues an MD5 message-digest
-    // operation, processing another message block, and updating the
-    // context.
-    void Update(
-        unsigned char *input,   // input block
-        unsigned int inputLen) {  // length of input block
-        unsigned int i, index, partLen;
-
-        // Compute number of bytes mod 64
-        index = (unsigned int)((context.count[0] >> 3) & 0x3F);
-
-        // Update number of bits
-        if((context.count[0] += ((UINT4)inputLen << 3))
-                < ((UINT4)inputLen << 3))
-            context.count[1]++;
-        context.count[1] += ((UINT4)inputLen >> 29);
-
-        partLen = 64 - index;
-
-        // Transform as many times as possible.
-        if(inputLen >= partLen) {
-            memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
-            MD5Transform(context.state, context.buffer);
-
-            for(i = partLen; i + 63 < inputLen; i += 64)
-                MD5Transform(context.state, &input[i]);
-
-            index = 0;
-        } else
-            i = 0;
-
-        /* Buffer remaining input */
-        memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen - i);
-    }
-
-    // MD5 finalization. Ends an MD5 message-digest operation, writing the
-    // the message digest and zeroizing the context.
-    // Writes to digestRaw
-    void Final() {
-        unsigned char bits[8];
-        unsigned int index, padLen;
-
-        // Save number of bits
-        Encode(bits, context.count, 8);
-
-        // Pad out to 56 mod 64.
-        index = (unsigned int)((context.count[0] >> 3) & 0x3f);
-        padLen = (index < 56) ? (56 - index) : (120 - index);
-        Update(PADDING, padLen);
-
-        // Append length (before padding)
-        Update(bits, 8);
-
-        // Store state in digest
-        Encode(digestRaw, context.state, 16);
-
-        // Zeroize sensitive information.
-        memset((POINTER)&context, 0, sizeof(context));
-
-        writeToString() ;
-    }
-
-    /// Buffer must be 32+1 (nul) = 33 chars long at least
-    void writeToString() {
-        int pos ;
-
-        for(pos = 0 ; pos < 16 ; pos++)
-            sprintf(digestChars + (pos * 2), "%02x", digestRaw[pos]) ;
-    }
-
-
-public:
-    // an MD5 digest is a 16-byte number (32 hex digits)
-    BYTE digestRaw[ 16 ] ;
-
-    // This version of the digest is actually
-    // a "printf'd" version of the digest.
-    char digestChars[ 33 ] ;
-
-    /// Load a file from disk and digest it
-    // Digests a file and returns the result.
-    const char* digestFile(const char *filename) {
-        if(NULL == filename || strcmp(filename, "") == 0)
-            return NULL;
-
-        Init() ;
-
-        FILE *file;
-
-        unsigned char buffer[1024] ;
-
-        if((file = fopen(filename, "rb")) == NULL) {
-            return NULL;
-        }
-        int len;
-        while((len = fread(buffer, 1, 1024, file)))
-            Update(buffer, len) ;
-        Final();
-
-        fclose(file);
-
-        return digestChars ;
-    }
-
-    /// Digests a byte-array already in memory
-    const char* digestMemory(BYTE *memchunk, int len) {
-        if(NULL == memchunk)
-            return NULL;
-
-        Init() ;
-        Update(memchunk, len) ;
-        Final() ;
-
-        return digestChars ;
-    }
-
-    // Digests a string and prints the result.
-    const char* digestString(const char *string) {
-        if(string == NULL)
-            return NULL;
-
-        Init() ;
-        Update((unsigned char*)string, strlen(string)) ;
-        Final() ;
-
-        return digestChars ;
-    }
-};
-
-inline bool md5String(const char* str, std::string& res) {
-    if(NULL == str) {
-        res = "";
-        return false;
-    }
-
-    MD5 md5;
-    const char *pRes = md5.digestString(str);
-    if(NULL == pRes) {
-        res = "";
-        return false;
-    }
-
-    res = pRes;
-    return true;
-}
-
-inline bool md5File(const char* filepath, std::string& res) {
-    if(NULL == filepath || strcmp(filepath, "") == 0) {
-        res = "";
-        return false;
-    }
-
-    MD5 md5;
-    const char *pRes = md5.digestFile(filepath);
-
-    if(NULL == pRes) {
-        res = "";
-        return false;
-    }
-
-    res = pRes;
-    return true;
-}
-}
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp
@ -1,51 +0,0 @@
-#ifndef LIMONP_MUTEX_LOCK_HPP
-#define LIMONP_MUTEX_LOCK_HPP
-
-#include <pthread.h>
-#include "NonCopyable.hpp"
-#include "Logging.hpp"
-
-namespace limonp {
-
-class MutexLock: NonCopyable {
- public:
-  MutexLock() {
-    XCHECK(!pthread_mutex_init(&mutex_, NULL));
-  }
-  ~MutexLock() {
-    XCHECK(!pthread_mutex_destroy(&mutex_));
-  }
-  pthread_mutex_t* GetPthreadMutex() {
-    return &mutex_;
-  }
-
- private:
-  void Lock() {
-    XCHECK(!pthread_mutex_lock(&mutex_));
-  }
-  void Unlock() {
-    XCHECK(!pthread_mutex_unlock(&mutex_));
-  }
-  friend class MutexLockGuard;
-
-  pthread_mutex_t mutex_;
-}; // class MutexLock
-
-class MutexLockGuard: NonCopyable {
- public:
-  explicit MutexLockGuard(MutexLock & mutex)
-    : mutex_(mutex) {
-    mutex_.Lock();
-  }
-  ~MutexLockGuard() {
-    mutex_.Unlock();
-  }
- private:
-  MutexLock & mutex_;
-}; // class MutexLockGuard
-
-#define MutexLockGuard(x) XCHECK(false);
-
-} // namespace limonp
-
-#endif // LIMONP_MUTEX_LOCK_HPP
--- a/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp
@ -1,21 +0,0 @@
-/************************************
- ************************************/
-#ifndef LIMONP_NONCOPYABLE_H
-#define LIMONP_NONCOPYABLE_H
-
-namespace limonp {
-
-class NonCopyable {
- protected:
-  NonCopyable() {
-  }
-  ~NonCopyable() {
-  }
- private:
-  NonCopyable(const NonCopyable& );
-  const NonCopyable& operator=(const NonCopyable& );
-}; // class NonCopyable
-
-} // namespace limonp
-
-#endif // LIMONP_NONCOPYABLE_H
--- a/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp
@ -1,157 +0,0 @@
-#ifndef LIMONP_STD_EXTEMSION_HPP
-#define LIMONP_STD_EXTEMSION_HPP
-
-#include <map>
-
-#ifdef __APPLE__
-#include <unordered_map>
-#include <unordered_set>
-#elif(__cplusplus >= 201103L)
-#include <unordered_map>
-#include <unordered_set>
-#elif defined _MSC_VER
-#include <unordered_map>
-#include <unordered_set>
-#else
-#include <tr1/unordered_map>
-#include <tr1/unordered_set>
-namespace std {
-using std::tr1::unordered_map;
-using std::tr1::unordered_set;
-}
-
-#endif
-
-#include <set>
-#include <string>
-#include <vector>
-#include <deque>
-#include <fstream>
-#include <sstream>
-
-namespace std {
-
-template<typename T>
-ostream& operator << (ostream& os, const vector<T>& v) {
-  if(v.empty()) {
-    return os << "[]";
-  }
-  os<<"["<<v[0];
-  for(size_t i = 1; i < v.size(); i++) {
-    os<<", "<<v[i];
-  }
-  os<<"]";
-  return os;
-}
-
-template<>
-inline ostream& operator << (ostream& os, const vector<string>& v) {
-  if(v.empty()) {
-    return os << "[]";
-  }
-  os<<"[\""<<v[0];
-  for(size_t i = 1; i < v.size(); i++) {
-    os<<"\", \""<<v[i];
-  }
-  os<<"\"]";
-  return os;
-}
-
-template<typename T>
-ostream& operator << (ostream& os, const deque<T>& dq) {
-  if(dq.empty()) {
-    return os << "[]";
-  }
-  os<<"[\""<<dq[0];
-  for(size_t i = 1; i < dq.size(); i++) {
-    os<<"\", \""<<dq[i];
-  }
-  os<<"\"]";
-  return os;
-}
-
-
-template<class T1, class T2>
-ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
-  os << pr.first << ":" << pr.second ;
-  return os;
-}
-
-
-template<class T>
-string& operator << (string& str, const T& obj) {
-  stringstream ss;
-  ss << obj; // call ostream& operator << (ostream& os,
-  return str = ss.str();
-}
-
-template<class T1, class T2>
-ostream& operator << (ostream& os, const map<T1, T2>& mp) {
-  if(mp.empty()) {
-    os<<"{}";
-    return os;
-  }
-  os<<'{';
-  typename map<T1, T2>::const_iterator it = mp.begin();
-  os<<*it;
-  it++;
-  while(it != mp.end()) {
-    os<<", "<<*it;
-    it++;
-  }
-  os<<'}';
-  return os;
-}
-template<class T1, class T2>
-ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
-  if(mp.empty()) {
-    return os << "{}";
-  }
-  os<<'{';
-  typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
-  os<<*it;
-  it++;
-  while(it != mp.end()) {
-    os<<", "<<*it++;
-  }
-  return os<<'}';
-}
-
-template<class T>
-ostream& operator << (ostream& os, const set<T>& st) {
-  if(st.empty()) {
-    os << "{}";
-    return os;
-  }
-  os<<'{';
-  typename set<T>::const_iterator it = st.begin();
-  os<<*it;
-  it++;
-  while(it != st.end()) {
-    os<<", "<<*it;
-    it++;
-  }
-  os<<'}';
-  return os;
-}
-
-template<class KeyType, class ContainType>
-bool IsIn(const ContainType& contain, const KeyType& key) {
-  return contain.end() != contain.find(key);
-}
-
-template<class T>
-basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
-  return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
-}
-
-template<class T>
-ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
-  ostreambuf_iterator<T> itr (ofs);
-  copy(s.begin(), s.end(), itr);
-  return ofs;
-}
-
-} // namespace std
-
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp
@ -1,382 +0,0 @@
-/************************************
- * file enc : ascii
- * author   : wuyanyi09@gmail.com
- ************************************/
-#ifndef LIMONP_STR_FUNCTS_H
-#define LIMONP_STR_FUNCTS_H
-#include <stdint.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <memory.h>
-#include <sys/types.h>
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <vector>
-#include <algorithm>
-#include <cctype>
-#include <map>
-#include <functional>
-#include <locale>
-#include <sstream>
-#include <iterator>
-#include <algorithm>
-#include "StdExtension.hpp"
-
-namespace limonp {
-using namespace std;
-inline string StringFormat(const char* fmt, ...) {
-  int size = 256;
-  std::string str;
-  va_list ap;
-  while (1) {
-    str.resize(size);
-    va_start(ap, fmt);
-    int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
-    va_end(ap);
-    if (n > -1 && n < size) {
-      str.resize(n);
-      return str;
-    }
-    if (n > -1)
-      size = n + 1;
-    else
-      size *= 2;
-  }
-  return str;
-}
-
-template<class T>
-void Join(T begin, T end, string& res, const string& connector) {
-  if(begin == end) {
-    return;
-  }
-  stringstream ss;
-  ss<<*begin;
-  begin++;
-  while(begin != end) {
-    ss << connector << *begin;
-    begin ++;
-  }
-  res = ss.str();
-}
-
-template<class T>
-string Join(T begin, T end, const string& connector) {
-  string res;
-  Join(begin ,end, res, connector);
-  return res;
-}
-
-inline string& Upper(string& str) {
-  transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
-  return str;
-}
-
-inline string& Lower(string& str) {
-  transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
-  return str;
-}
-
-inline bool IsSpace(unsigned c) {
-  // when passing large int as the argument of isspace, it core dump, so here need a type cast.
-  return c > 0xff ? false : std::isspace(c & 0xff);
-}
-
-inline std::string& LTrim(std::string &s) {
-  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
-  return s;
-}
-
-inline std::string& RTrim(std::string &s) {
-  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
-  return s;
-}
-
-inline std::string& Trim(std::string &s) {
-  return LTrim(RTrim(s));
-}
-
-inline std::string& LTrim(std::string & s, char x) {
-  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
-  return s;
-}
-
-inline std::string& RTrim(std::string & s, char x) {
-  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
-  return s;
-}
-
-inline std::string& Trim(std::string &s, char x) {
-  return LTrim(RTrim(s, x), x);
-}
-
-inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
-  res.clear();
-  size_t Start = 0;
-  size_t end = 0;
-  string sub;
-  while(Start < src.size()) {
-    end = src.find_first_of(pattern, Start);
-    if(string::npos == end || res.size() >= maxsplit) {
-      sub = src.substr(Start);
-      res.push_back(sub);
-      return;
-    }
-    sub = src.substr(Start, end - Start);
-    res.push_back(sub);
-    Start = end + 1;
-  }
-  return;
-}
-
-inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
-  vector<string> res;
-  Split(src, res, pattern, maxsplit);
-  return res;
-}
-
-inline bool StartsWith(const string& str, const string& prefix) {
-  if(prefix.length() > str.length()) {
-    return false;
-  }
-  return 0 == str.compare(0, prefix.length(), prefix);
-}
-
-inline bool EndsWith(const string& str, const string& suffix) {
-  if(suffix.length() > str.length()) {
-    return false;
-  }
-  return 0 == str.compare(str.length() -  suffix.length(), suffix.length(), suffix);
-}
-
-inline bool IsInStr(const string& str, char ch) {
-  return str.find(ch) != string::npos;
-}
-
-inline uint16_t TwocharToUint16(char high, char low) {
-  return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
-}
-
-template <class Uint16Container>
-bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
-  if(!str) {
-    return false;
-  }
-  char ch1, ch2;
-  uint16_t tmp;
-  vec.clear();
-  for(size_t i = 0; i < len;) {
-    if(!(str[i] & 0x80)) { // 0xxxxxxx
-      vec.push_back(str[i]);
-      i++;
-    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
-      ch1 = (str[i] >> 2) & 0x07;
-      ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
-      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
-      vec.push_back(tmp);
-      i += 2;
-    } else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
-      ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
-      ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
-      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
-      vec.push_back(tmp);
-      i += 3;
-    } else {
-      return false;
-    }
-  }
-  return true;
-}
-
-template <class Uint16Container>
-bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
-  return Utf8ToUnicode(str.c_str(), str.size(), vec);
-}
-
-template <class Uint32Container>
-bool Utf8ToUnicode32(const char * str, size_t size, Uint32Container& vec) {
-  uint32_t tmp;
-  vec.clear();
-  for(size_t i = 0; i < size;) {
-    if(!(str[i] & 0x80)) { // 0xxxxxxx
-      // 7bit, total 7bit
-      tmp = (uint8_t)(str[i]) & 0x7f;
-      i++;
-    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < size) { // 110xxxxxx
-      // 5bit, total 5bit
-      tmp = (uint8_t)(str[i]) & 0x1f;
-
-      // 6bit, total 11bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+1]) & 0x3f;
-      i += 2;
-    } else if((uint8_t)str[i] <= 0xef && i + 2 < size) { // 1110xxxxxx
-      // 4bit, total 4bit
-      tmp = (uint8_t)(str[i]) & 0x0f;
-
-      // 6bit, total 10bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+1]) & 0x3f;
-
-      // 6bit, total 16bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+2]) & 0x3f;
-
-      i += 3;
-    } else if((uint8_t)str[i] <= 0xf7 && i + 3 < size) { // 11110xxxx
-      // 3bit, total 3bit
-      tmp = (uint8_t)(str[i]) & 0x07;
-
-      // 6bit, total 9bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+1]) & 0x3f;
-
-      // 6bit, total 15bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+2]) & 0x3f;
-
-      // 6bit, total 21bit
-      tmp <<= 6;
-      tmp |= (uint8_t)(str[i+3]) & 0x3f;
-
-      i += 4;
-    } else {
-      return false;
-    }
-    vec.push_back(tmp);
-  }
-  return true;
-}
-
-template <class Uint32Container>
-bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
-    return Utf8ToUnicode32(str.data(), str.size(), vec);
-}
-
-inline int UnicodeToUtf8Bytes(uint32_t ui){
-    if(ui <= 0x7f) {
-        return 1;
-    } else if(ui <= 0x7ff) {
-        return 2;
-    } else if(ui <= 0xffff) {
-        return 3;
-    } else {
-        return 4;
-    }
-}
-
-template <class Uint32ContainerConIter>
-void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
-  res.clear();
-  uint32_t ui;
-  while(begin != end) {
-    ui = *begin;
-    if(ui <= 0x7f) {
-      res += char(ui);
-    } else if(ui <= 0x7ff) {
-      res += char(((ui >> 6) & 0x1f) | 0xc0);
-      res += char((ui & 0x3f) | 0x80);
-    } else if(ui <= 0xffff) {
-      res += char(((ui >> 12) & 0x0f) | 0xe0);
-      res += char(((ui >> 6) & 0x3f) | 0x80);
-      res += char((ui & 0x3f) | 0x80);
-    } else {
-      res += char(((ui >> 18) & 0x03) | 0xf0);
-      res += char(((ui >> 12) & 0x3f) | 0x80);
-      res += char(((ui >> 6) & 0x3f) | 0x80);
-      res += char((ui & 0x3f) | 0x80);
-    }
-    begin ++;
-  }
-}
-
-template <class Uint16ContainerConIter>
-void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
-  res.clear();
-  uint16_t ui;
-  while(begin != end) {
-    ui = *begin;
-    if(ui <= 0x7f) {
-      res += char(ui);
-    } else if(ui <= 0x7ff) {
-      res += char(((ui>>6) & 0x1f) | 0xc0);
-      res += char((ui & 0x3f) | 0x80);
-    } else {
-      res += char(((ui >> 12) & 0x0f )| 0xe0);
-      res += char(((ui>>6) & 0x3f )| 0x80 );
-      res += char((ui & 0x3f) | 0x80);
-    }
-    begin ++;
-  }
-}
-
-
-template <class Uint16Container>
-bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
-  vec.clear();
-  if(!str) {
-    return true;
-  }
-  size_t i = 0;
-  while(i < len) {
-    if(0 == (str[i] & 0x80)) {
-      vec.push_back(uint16_t(str[i]));
-      i++;
-    } else {
-      if(i + 1 < len) { //&& (str[i+1] & 0x80))
-        uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
-        vec.push_back(tmp);
-        i += 2;
-      } else {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-template <class Uint16Container>
-bool GBKTrans(const string& str, Uint16Container& vec) {
-  return GBKTrans(str.c_str(), str.size(), vec);
-}
-
-template <class Uint16ContainerConIter>
-void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
-  res.clear();
-  //pair<char, char> pa;
-  char first, second;
-  while(begin != end) {
-    //pa = uint16ToChar2(*begin);
-    first = ((*begin)>>8) & 0x00ff;
-    second = (*begin) & 0x00ff;
-    if(first & 0x80) {
-      res += first;
-      res += second;
-    } else {
-      res += second;
-    }
-    begin++;
-  }
-}
-
-/*
- * format example: "%Y-%m-%d %H:%M:%S"
- */
-// inline void GetTime(const string& format, string&  timeStr) {
-//   time_t timeNow;
-//   time(&timeNow);
-//   timeStr.resize(64);
-//   size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
-//   timeStr.resize(len);
-// }
-
-inline string PathJoin(const string& path1, const string& path2) {
-  if(EndsWith(path1, "/")) {
-    return path1 + path2;
-  }
-  return path1 + "/" + path2;
-}
-
-}
-#endif
--- a/libchinese-segmentation/cppjieba/limonp/Thread.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Thread.hpp
@ -1,44 +0,0 @@
-#ifndef LIMONP_THREAD_HPP
-#define LIMONP_THREAD_HPP
-
-#include "Logging.hpp"
-#include "NonCopyable.hpp"
-
-namespace limonp {
-
-class IThread: NonCopyable {
- public:
-  IThread(): isStarted(false), isJoined(false) {
-  }
-  virtual ~IThread() {
-    if(isStarted && !isJoined) {
-      XCHECK(!pthread_detach(thread_));
-    }
-  };
-
-  virtual void Run() = 0;
-  void Start() {
-    XCHECK(!isStarted);
-    XCHECK(!pthread_create(&thread_, NULL, Worker, this));
-    isStarted = true;
-  }
-  void Join() {
-    XCHECK(!isJoined);
-    XCHECK(!pthread_join(thread_, NULL));
-    isJoined = true;
-  }
- private:
-  static void * Worker(void * data) {
-    IThread * ptr = (IThread* ) data;
-    ptr->Run();
-    return NULL;
-  }
-
-  pthread_t thread_;
-  bool isStarted;
-  bool isJoined;
-}; // class IThread
-
-} // namespace limonp
-
-#endif // LIMONP_THREAD_HPP
--- a/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp
@ -1,86 +0,0 @@
-#ifndef LIMONP_THREAD_POOL_HPP
-#define LIMONP_THREAD_POOL_HPP
-
-#include "Thread.hpp"
-#include "BlockingQueue.hpp"
-#include "BoundedBlockingQueue.hpp"
-#include "Closure.hpp"
-
-namespace limonp {
-
-using namespace std;
-
-//class ThreadPool;
-class ThreadPool: NonCopyable {
- public:
-  class Worker: public IThread {
-   public:
-    Worker(ThreadPool* pool): ptThreadPool_(pool) {
-      assert(ptThreadPool_);
-    }
-    virtual ~Worker() {
-    }
-
-    virtual void Run() {
-      while (true) {
-        ClosureInterface* closure = ptThreadPool_->queue_.Pop();
-        if (closure == NULL) {
-          break;
-        }
-        try {
-          closure->Run();
-        } catch(std::exception& e) {
-          XLOG(ERROR) << e.what();
-        } catch(...) {
-          XLOG(ERROR) << " unknown exception.";
-        }
-        delete closure;
-      }
-    }
-   private:
-    ThreadPool * ptThreadPool_;
-  }; // class Worker
-
-  ThreadPool(size_t thread_num)
-    : threads_(thread_num), 
-      queue_(thread_num) {
-    assert(thread_num);
-    for(size_t i = 0; i < threads_.size(); i ++) {
-      threads_[i] = new Worker(this);
-    }
-  }
-  ~ThreadPool() {
-    Stop();
-  }
-
-  void Start() {
-    for(size_t i = 0; i < threads_.size(); i++) {
-      threads_[i]->Start();
-    }
-  }
-  void Stop() {
-    for(size_t i = 0; i < threads_.size(); i ++) {
-      queue_.Push(NULL);
-    }
-    for(size_t i = 0; i < threads_.size(); i ++) {
-      threads_[i]->Join();
-      delete threads_[i];
-    }
-    threads_.clear();
-  }
-
-  void Add(ClosureInterface* task) {
-    assert(task);
-    queue_.Push(task);
-  }
-
- private:
-  friend class Worker;
-
-  vector<IThread*> threads_;
-  BoundedBlockingQueue<ClosureInterface*> queue_;
-}; // class ThreadPool
-
-} // namespace limonp
-
-#endif // LIMONP_THREAD_POOL_HPP
--- a/libchinese-segmentation/cppjieba/limonp/limonp.pri
+++ b/libchinese-segmentation/cppjieba/limonp/limonp.pri
@ -1,22 +0,0 @@
-INCLUDEPATH += $$PWD
-
-HEADERS += \
-    $$PWD/ArgvContext.hpp \
-    $$PWD/BlockingQueue.hpp \
-    $$PWD/BoundedBlockingQueue.hpp \
-    $$PWD/BoundedQueue.hpp \
-    $$PWD/Closure.hpp \
-    $$PWD/Colors.hpp \
-    $$PWD/Condition.hpp \
-    $$PWD/Config.hpp \
-    $$PWD/FileLock.hpp \
-    $$PWD/ForcePublic.hpp \
-    $$PWD/LocalVector.hpp \
-    $$PWD/Logging.hpp \
-    $$PWD/Md5.hpp \
-    $$PWD/MutexLock.hpp \
-    $$PWD/NonCopyable.hpp \
-    $$PWD/StdExtension.hpp \
-    $$PWD/StringUtil.hpp \
-    $$PWD/Thread.hpp \
-    $$PWD/ThreadPool.hpp
--- a/libchinese-segmentation/development-files/header-files/ChineseSegmentation
+++ b/libchinese-segmentation/development-files/header-files/ChineseSegmentation
@ -1 +0,0 @@
-#include "chinese-segmentation.h"
--- a/libchinese-segmentation/development-files/header-files/HanZiToPinYin
+++ b/libchinese-segmentation/development-files/header-files/HanZiToPinYin
@ -1 +0,0 @@
-#include "hanzi-to-pinyin.h"
--- a/libchinese-segmentation/dict/README.md
+++ b/libchinese-segmentation/dict/README.md
@ -1,31 +0,0 @@
-# CppJieba字典
-
-文件后缀名代表的是词典的编码方式。
-比如filename.utf8 是 utf8编码，filename.gbk 是 gbk编码方式。
-
-
-## 分词
-
-### jieba.dict.utf8/gbk
-
-作为最大概率法(MPSegment: Max Probability)分词所使用的词典。
-
-### hmm_model.utf8/gbk
-
-作为隐式马尔科夫模型(HMMSegment: Hidden Markov Model)分词所使用的词典。
-
-__对于MixSegment(混合MPSegment和HMMSegment两者)则同时使用以上两个词典__
-
-
-## 关键词抽取
-
-### idf.utf8
-
-IDF(Inverse Document Frequency)
-在KeywordExtractor中，使用的是经典的TF-IDF算法，所以需要这么一个词典提供IDF信息。
-
-### stop_words.utf8
-
-停用词词典
-
-
--- a/libchinese-segmentation/dict/hmm_model.utf8
+++ b/libchinese-segmentation/dict/hmm_model.utf8
--- a/libchinese-segmentation/dict/idf.utf8
+++ b/libchinese-segmentation/dict/idf.utf8
--- a/libchinese-segmentation/dict/jieba.dict.utf8
+++ b/libchinese-segmentation/dict/jieba.dict.utf8
--- a/libchinese-segmentation/dict/pinyinWithoutTone.txt
+++ b/libchinese-segmentation/dict/pinyinWithoutTone.txt
--- a/libchinese-segmentation/dict/pos_dict/char_state_tab.utf8
+++ b/libchinese-segmentation/dict/pos_dict/char_state_tab.utf8
--- a/libchinese-segmentation/dict/pos_dict/prob_emit.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_emit.utf8
--- a/libchinese-segmentation/dict/pos_dict/prob_start.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_start.utf8
@ -1,259 +0,0 @@
-#初始状态的概率
-#格式
-#状态:概率
-B,a:-4.7623052146
-B,ad:-6.68006603678
-B,ag:-3.14e+100
-B,an:-8.69708322302
-B,b:-5.01837436211
-B,bg:-3.14e+100
-B,c:-3.42388018495
-B,d:-3.97504752976
-B,df:-8.88897423083
-B,dg:-3.14e+100
-B,e:-8.56355183039
-B,en:-3.14e+100
-B,f:-5.49163041848
-B,g:-3.14e+100
-B,h:-13.53336513
-B,i:-6.11578472756
-B,in:-3.14e+100
-B,j:-5.05761912847
-B,jn:-3.14e+100
-B,k:-3.14e+100
-B,l:-4.90588358466
-B,ln:-3.14e+100
-B,m:-3.6524299819
-B,mg:-3.14e+100
-B,mq:-6.7869530014
-B,n:-1.69662577975
-B,ng:-3.14e+100
-B,nr:-2.23104959138
-B,nrfg:-5.87372217541
-B,nrt:-4.98564273352
-B,ns:-2.8228438315
-B,nt:-4.84609166818
-B,nz:-3.94698846058
-B,o:-8.43349870215
-B,p:-4.20098413209
-B,q:-6.99812385896
-B,qe:-3.14e+100
-B,qg:-3.14e+100
-B,r:-3.40981877908
-B,rg:-3.14e+100
-B,rr:-12.4347528413
-B,rz:-7.94611647157
-B,s:-5.52267359084
-B,t:-3.36474790945
-B,tg:-3.14e+100
-B,u:-9.1639172775
-B,ud:-3.14e+100
-B,ug:-3.14e+100
-B,uj:-3.14e+100
-B,ul:-3.14e+100
-B,uv:-3.14e+100
-B,uz:-3.14e+100
-B,v:-2.67405848743
-B,vd:-9.04472876024
-B,vg:-3.14e+100
-B,vi:-12.4347528413
-B,vn:-4.33156108902
-B,vq:-12.1470707689
-B,w:-3.14e+100
-B,x:-3.14e+100
-B,y:-9.84448567586
-B,yg:-3.14e+100
-B,z:-7.04568111149
-B,zg:-3.14e+100
-E,a:-3.14e+100
-E,ad:-3.14e+100
-E,ag:-3.14e+100
-E,an:-3.14e+100
-E,b:-3.14e+100
-E,bg:-3.14e+100
-E,c:-3.14e+100
-E,d:-3.14e+100
-E,df:-3.14e+100
-E,dg:-3.14e+100
-E,e:-3.14e+100
-E,en:-3.14e+100
-E,f:-3.14e+100
-E,g:-3.14e+100
-E,h:-3.14e+100
-E,i:-3.14e+100
-E,in:-3.14e+100
-E,j:-3.14e+100
-E,jn:-3.14e+100
-E,k:-3.14e+100
-E,l:-3.14e+100
-E,ln:-3.14e+100
-E,m:-3.14e+100
-E,mg:-3.14e+100
-E,mq:-3.14e+100
-E,n:-3.14e+100
-E,ng:-3.14e+100
-E,nr:-3.14e+100
-E,nrfg:-3.14e+100
-E,nrt:-3.14e+100
-E,ns:-3.14e+100
-E,nt:-3.14e+100
-E,nz:-3.14e+100
-E,o:-3.14e+100
-E,p:-3.14e+100
-E,q:-3.14e+100
-E,qe:-3.14e+100
-E,qg:-3.14e+100
-E,r:-3.14e+100
-E,rg:-3.14e+100
-E,rr:-3.14e+100
-E,rz:-3.14e+100
-E,s:-3.14e+100
-E,t:-3.14e+100
-E,tg:-3.14e+100
-E,u:-3.14e+100
-E,ud:-3.14e+100
-E,ug:-3.14e+100
-E,uj:-3.14e+100
-E,ul:-3.14e+100
-E,uv:-3.14e+100
-E,uz:-3.14e+100
-E,v:-3.14e+100
-E,vd:-3.14e+100
-E,vg:-3.14e+100
-E,vi:-3.14e+100
-E,vn:-3.14e+100
-E,vq:-3.14e+100
-E,w:-3.14e+100
-E,x:-3.14e+100
-E,y:-3.14e+100
-E,yg:-3.14e+100
-E,z:-3.14e+100
-E,zg:-3.14e+100
-M,a:-3.14e+100
-M,ad:-3.14e+100
-M,ag:-3.14e+100
-M,an:-3.14e+100
-M,b:-3.14e+100
-M,bg:-3.14e+100
-M,c:-3.14e+100
-M,d:-3.14e+100
-M,df:-3.14e+100
-M,dg:-3.14e+100
-M,e:-3.14e+100
-M,en:-3.14e+100
-M,f:-3.14e+100
-M,g:-3.14e+100
-M,h:-3.14e+100
-M,i:-3.14e+100
-M,in:-3.14e+100
-M,j:-3.14e+100
-M,jn:-3.14e+100
-M,k:-3.14e+100
-M,l:-3.14e+100
-M,ln:-3.14e+100
-M,m:-3.14e+100
-M,mg:-3.14e+100
-M,mq:-3.14e+100
-M,n:-3.14e+100
-M,ng:-3.14e+100
-M,nr:-3.14e+100
-M,nrfg:-3.14e+100
-M,nrt:-3.14e+100
-M,ns:-3.14e+100
-M,nt:-3.14e+100
-M,nz:-3.14e+100
-M,o:-3.14e+100
-M,p:-3.14e+100
-M,q:-3.14e+100
-M,qe:-3.14e+100
-M,qg:-3.14e+100
-M,r:-3.14e+100
-M,rg:-3.14e+100
-M,rr:-3.14e+100
-M,rz:-3.14e+100
-M,s:-3.14e+100
-M,t:-3.14e+100
-M,tg:-3.14e+100
-M,u:-3.14e+100
-M,ud:-3.14e+100
-M,ug:-3.14e+100
-M,uj:-3.14e+100
-M,ul:-3.14e+100
-M,uv:-3.14e+100
-M,uz:-3.14e+100
-M,v:-3.14e+100
-M,vd:-3.14e+100
-M,vg:-3.14e+100
-M,vi:-3.14e+100
-M,vn:-3.14e+100
-M,vq:-3.14e+100
-M,w:-3.14e+100
-M,x:-3.14e+100
-M,y:-3.14e+100
-M,yg:-3.14e+100
-M,z:-3.14e+100
-M,zg:-3.14e+100
-S,a:-3.90253968313
-S,ad:-11.0484584802
-S,ag:-6.95411391796
-S,an:-12.8402179494
-S,b:-6.47288876397
-S,bg:-3.14e+100
-S,c:-4.78696679586
-S,d:-3.90391976418
-S,df:-3.14e+100
-S,dg:-8.9483976513
-S,e:-5.94251300628
-S,en:-3.14e+100
-S,f:-5.19482024998
-S,g:-6.50782681533
-S,h:-8.65056320738
-S,i:-3.14e+100
-S,in:-3.14e+100
-S,j:-4.91199211964
-S,jn:-3.14e+100
-S,k:-6.94032059583
-S,l:-3.14e+100
-S,ln:-3.14e+100
-S,m:-3.26920065212
-S,mg:-10.8253149289
-S,mq:-3.14e+100
-S,n:-3.85514838976
-S,ng:-4.9134348611
-S,nr:-4.48366310396
-S,nrfg:-3.14e+100
-S,nrt:-3.14e+100
-S,ns:-3.14e+100
-S,nt:-12.1470707689
-S,nz:-3.14e+100
-S,o:-8.46446092775
-S,p:-2.98684018136
-S,q:-4.88865861826
-S,qe:-3.14e+100
-S,qg:-3.14e+100
-S,r:-2.76353367841
-S,rg:-10.2752685919
-S,rr:-3.14e+100
-S,rz:-3.14e+100
-S,s:-3.14e+100
-S,t:-3.14e+100
-S,tg:-6.27284253188
-S,u:-6.94032059583
-S,ud:-7.72823016105
-S,ug:-7.53940370266
-S,uj:-6.85251045118
-S,ul:-8.41537131755
-S,uv:-8.15808672229
-S,uz:-9.29925862537
-S,v:-3.05329230341
-S,vd:-3.14e+100
-S,vg:-5.94301818437
-S,vi:-3.14e+100
-S,vn:-11.4539235883
-S,vq:-3.14e+100
-S,w:-3.14e+100
-S,x:-8.42741965607
-S,y:-6.19707946995
-S,yg:-13.53336513
-S,z:-3.14e+100
-S,zg:-3.14e+100
--- a/libchinese-segmentation/dict/pos_dict/prob_trans.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_trans.utf8
--- a/libchinese-segmentation/dict/stop_words.utf8
+++ b/libchinese-segmentation/dict/stop_words.utf8
--- a/libchinese-segmentation/dict/user.dict.utf8
+++ b/libchinese-segmentation/dict/user.dict.utf8
@ -1,4 +0,0 @@
-云计算
-韩玉鉴赏
-蓝翔 nz
-区块链 10 nz
--- a/libchinese-segmentation/hanzi-to-pinyin-private.h
+++ b/libchinese-segmentation/hanzi-to-pinyin-private.h
@ -1,29 +0,0 @@
-#ifndef HANZITOPINYINPRIVATE_H
-#define HANZITOPINYINPRIVATE_H
-
-#include <QtCore/qglobal.h>
-#include "cppjieba/PinYinTrie.hpp"
-#include "hanzi-to-pinyin.h"
-
-#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
-
-using namespace std;
-
-class PINYINMANAGER_EXPORT HanZiToPinYinPrivate
-{
-public:
-    HanZiToPinYinPrivate(HanZiToPinYin *parent = nullptr);
-    ~HanZiToPinYinPrivate();
-
-public:
-    template <typename T>
-    bool isMultiTone(T &&t) {return m_pinYinTrie->isMultiTone(std::forward<T>(t));}
-
-    bool contains(string &word);
-    int getResults(string word, QStringList &results);
-
-private:
-    cppjieba::PinYinTrie *m_pinYinTrie = nullptr;
-    HanZiToPinYin *q = nullptr;
-};
-#endif // HANZITOPINYINPRIVATE_H
--- a/libchinese-segmentation/hanzi-to-pinyin.cpp
+++ b/libchinese-segmentation/hanzi-to-pinyin.cpp
@ -1,83 +0,0 @@
-#include "hanzi-to-pinyin.h"
-#include "hanzi-to-pinyin-private.h"
-#include <mutex>
-
-HanZiToPinYin * HanZiToPinYin::g_pinYinManager = nullptr;
-std::once_flag g_singleFlag;
-
-
-
-bool HanZiToPinYinPrivate::contains(string &word)
-{
-    return m_pinYinTrie->contains(word);
-}
-
-int HanZiToPinYinPrivate::getResults(string word, QStringList &results)
-{
-    results.clear();
-    if (-1 != m_pinYinTrie->getMultiTonResults(word, results)) {
-        return 0;
-    }
-    QString tmp;
-    if (-1 != m_pinYinTrie->getSingleTonResult(word, tmp)) {
-        results.append(tmp);
-        return 0;
-    }
-    return -1;
-}
-
-HanZiToPinYinPrivate::HanZiToPinYinPrivate(HanZiToPinYin *parent) : q(parent)
-{
-    const char * const  PINYIN_PATH = "/usr/share/ukui-search/res/dict/pinyinWithoutTone.txt";
-    m_pinYinTrie = new cppjieba::PinYinTrie(PINYIN_PATH);
-}
-
-HanZiToPinYinPrivate::~HanZiToPinYinPrivate()
-{
-    if (m_pinYinTrie){
-        delete m_pinYinTrie;
-        m_pinYinTrie = nullptr;
-    }
-}
-
-HanZiToPinYin * HanZiToPinYin::getInstance()
-{
-    call_once(g_singleFlag, []() {
-        g_pinYinManager = new HanZiToPinYin;
-    });
-    return g_pinYinManager;
-}
-
-bool HanZiToPinYin::contains(string &word)
-{
-    return d->contains(word);
-}
-
-bool HanZiToPinYin::isMultiTone(string &word)
-{
-    return d->isMultiTone(word);
-}
-
-bool HanZiToPinYin::isMultiTone(string &&word)
-{
-    return d->isMultiTone(word);
-}
-
-bool HanZiToPinYin::isMultiTone(const string &word)
-{
-    return d->isMultiTone(word);
-}
-
-bool HanZiToPinYin::isMultiTone(const string &&word)
-{
-    return d->isMultiTone(word);
-}
-
-int HanZiToPinYin::getResults(string word, QStringList &results)
-{
-    return d->getResults(word, results);
-}
-
-HanZiToPinYin::HanZiToPinYin() : d(new HanZiToPinYinPrivate)
-{
-}
--- a/libchinese-segmentation/hanzi-to-pinyin.h
+++ b/libchinese-segmentation/hanzi-to-pinyin.h
@ -1,53 +0,0 @@
-#ifndef HANZITOPINYIN_H
-#define HANZITOPINYIN_H
-
-#include <QtCore/qglobal.h>
-//#include "cppjieba/PinYinTrie.hpp"
-#include <QStringList>
-#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
-
-using namespace std;
-
-class HanZiToPinYinPrivate;
-class PINYINMANAGER_EXPORT HanZiToPinYin
-{
-public:
-    static HanZiToPinYin * getInstance();
-
-public:
-    /**
-     * @brief HanZiToPinYin::isMultiTone 判断是否为多音字（只支持单字）
-     * @param word 要判断的字
-     * @return bool 不是多音字或不是单字返回false
-     */
-    bool isMultiTone(string &word);
-    bool isMultiTone(string &&word);
-    bool isMultiTone(const string &word);
-    bool isMultiTone(const string &&word);
-
-    /**
-     * @brief HanZiToPinYin::contains 查询某个字是否有拼音（是否在数据库包含，只支持单字）
-     * @param word 要查询的字
-     * @return bool 数据库不包含或不是单字返回false
-     */
-    bool contains(string &word);
-
-    /**
-     * @brief HanZiToPinYin::getResults 获取某个字的拼音（只支持单字）
-     * @param word 要获取拼音的字
-     * @param results word的拼音列表（有可能多音字），每次调用results会清空
-     * @return int 获取到返回0，否则返回-1
-     */
-    int getResults(string word, QStringList &results);
-
-protected:
-    HanZiToPinYin();
-    ~HanZiToPinYin();
-    HanZiToPinYin(const HanZiToPinYin&) = delete;
-    HanZiToPinYin& operator =(const HanZiToPinYin&) = delete;
-private:
-    static HanZiToPinYin *g_pinYinManager;
-    HanZiToPinYinPrivate *d = nullptr;
-};
-
-#endif // PINYINMANAGER_H
--- a/libchinese-segmentation/libchinese-segmentation.pro
+++ b/libchinese-segmentation/libchinese-segmentation.pro
@ -1,84 +0,0 @@
-QT -= gui
-
-VERSION = 1.0.0
-TARGET =  chinese-segmentation
-TEMPLATE = lib
-DEFINES += LIBCHINESESEGMENTATION_LIBRARY
-
-CONFIG += c++11 create_pc create_prl no_install_prl
-
-# The following define makes your compiler emit warnings if you use
-# any Qt feature that has been marked deprecated (the exact warnings
-# depend on your compiler). Please consult the documentation of the
-# deprecated API in order to know how to port your code away from it.
-DEFINES += QT_DEPRECATED_WARNINGS
-QMAKE_CXXFLAGS += -Werror=return-type -Werror=return-local-addr
-#QMAKE_CXXFLAGS += -Werror=uninitialized
-QMAKE_CXXFLAGS += -execution-charset:utf-8
-
-# You can also make your code fail to compile if it uses deprecated APIs.
-# In order to do so, uncomment the following line.
-# You can also select to disable deprecated APIs only up to a certain version of Qt.
-#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
-include(cppjieba/cppjieba.pri)
-
-#LIBS += -L/usr/local/lib/libjemalloc -ljemalloc
-
-SOURCES += \
-    chinese-segmentation.cpp \
-    hanzi-to-pinyin.cpp
-
-HEADERS += \
-    chinese-segmentation-private.h \
-    chinese-segmentation.h \
-    common-struct.h \
-    hanzi-to-pinyin-private.h \
-    hanzi-to-pinyin.h \
-    libchinese-segmentation_global.h
-
-dict_files.path = /usr/share/ukui-search/res/dict/
-dict_files.files = $$PWD/dict/*.utf8\
-dict_files.files += $$PWD/dict/pos_dict/*.utf8\
-dict_files.files += $$PWD/dict/*.txt\
-
-INSTALLS += \
-    dict_files \
-
-# Default rules for deployment.
-unix {
-    target.path = $$[QT_INSTALL_LIBS]
-    QMAKE_PKGCONFIG_NAME = chinese-segmentation
-    QMAKE_PKGCONFIG_DESCRIPTION = chinese-segmentation Header files
-    QMAKE_PKGCONFIG_VERSION = $$VERSION
-    QMAKE_PKGCONFIG_LIBDIR = $$target.path
-    QMAKE_PKGCONFIG_DESTDIR = pkgconfig
-    QMAKE_PKGCONFIG_INCDIR = /usr/include/chinese-seg
-    QMAKE_PKGCONFIG_CFLAGS += -I/usr/include/chinese-seg
-
-!isEmpty(target.path): INSTALLS += target
-
-    header.path = /usr/include/chinese-seg
-    header.files += chinese-segmentation.h libchinese-segmentation_global.h common-struct.h hanzi-to-pinyin.h
-    header.files += development-files/header-files/*
-#    headercppjieba.path = /usr/include/chinese-seg/cppjieba/
-#    headercppjieba.files = cppjieba/*
-    INSTALLS += header
-}
-
-
-#DISTFILES += \
-#    jiaba/jieba.pri
-
-DISTFILES += \
-    dict/README.md \
-    dict/hmm_model.utf8 \
-    dict/idf.utf8 \
-    dict/jieba.dict.utf8 \
-    dict/pos_dict/char_state_tab.utf8 \
-    dict/pos_dict/prob_emit.utf8 \
-    dict/pos_dict/prob_start.utf8 \
-    dict/pos_dict/prob_trans.utf8 \
-    dict/stop_words.utf8 \
-    dict/user.dict.utf8 \
-    dict/pinyinWithoutTone.txt \
-    development-files/header-files/* \
--- a/libchinese-segmentation/libchinese-segmentation_global.h
+++ b/libchinese-segmentation/libchinese-segmentation_global.h
@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2020, KylinSoft Co., Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- *
- * Authors: zhangzihao <zhangzihao@kylinos.cn>
- * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
- *
- */
-#ifndef CHINESESEGMENTATION_GLOBAL_H
-#define CHINESESEGMENTATION_GLOBAL_H
-
-#include <QtCore/qglobal.h>
-
-#if defined(CHINESESEGMENTATION_LIBRARY)
-#  define CHINESESEGMENTATION_EXPORT Q_DECL_EXPORT
-#else
-#  define CHINESESEGMENTATION_EXPORT Q_DECL_IMPORT
-#endif
-
-#endif // CHINESESEGMENTATION_GLOBAL_H
				`@ -0,0 +1 @@`
				`Subproject commit 02216728e0cf1f1304e97f7fc1f7b56f4ddc5872`