Add chinese-segmentation submodule.

2022-06-20 14:12:31 +08:00 · 2022-06-20 14:12:31 +08:00 · 8d40e8a7b2
parent 1512502cea
commit 8d40e8a7b2
64 changed files with 4 additions and 670851 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +1,6 @@
 [submodule "debian"]
 	path = debian
 	url = http://gitlab2.kylin.com/kylin-desktop/ukui-search-debian.git
 [submodule "libchinese-segmentation"]
 	path = libchinese-segmentation
 	url = http://gitlab2.kylin.com/iaom/chinese-segmentation.git
--- a/1
+++ b/1
@ -0,0 +1 @@
 Subproject commit 02216728e0cf1f1304e97f7fc1f7b56f4ddc5872
--- a/libchinese-segmentation/chinese-segmentation-private.h
+++ b/libchinese-segmentation/chinese-segmentation-private.h
@ -1,33 +0,0 @@
 #ifndef CHINESESEGMENTATIONPRIVATE_H
 #define CHINESESEGMENTATIONPRIVATE_H
 #include "chinese-segmentation.h"
 #include "cppjieba/Jieba.hpp"
 #include "cppjieba/KeywordExtractor.hpp"
 class ChineseSegmentationPrivate
 {
 public:
    explicit ChineseSegmentationPrivate(ChineseSegmentation *parent = nullptr);
    ~ChineseSegmentationPrivate();
    vector<KeyWord> callSegment(const string& sentence);
    vector<string> callMixSegmentCutStr(const string& sentence);
    vector<Word> callMixSegmentCutWord(const string& sentence);
    string lookUpTagOfWord(const string& word);
    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
    vector<Word> callFullSegment(const string& sentence);
    vector<Word> callQuerySegment(const string& sentence);
    vector<Word> callHMMSegment(const string& sentence);
    vector<Word> callMPSegment(const string& sentence);
 private:
    cppjieba::Jieba *m_jieba;
    ChineseSegmentation *q = nullptr;
 };
 #endif // CHINESESEGMENTATIONPRIVATE_H
--- a/libchinese-segmentation/chinese-segmentation.cpp
+++ b/libchinese-segmentation/chinese-segmentation.cpp
@ -1,162 +0,0 @@
 /*
 * Copyright (C) 2020, KylinSoft Co., Ltd.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * Authors: zhangzihao <zhangzihao@kylinos.cn>
 * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
 *
 */
 #include "chinese-segmentation.h"
 #include "chinese-segmentation-private.h"
 ChineseSegmentationPrivate::ChineseSegmentationPrivate(ChineseSegmentation *parent) : q(parent)
 {
    const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
    const char * const  HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
    const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
    const char * const  IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
    const char * const  STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
    m_jieba = new cppjieba::Jieba(DICT_PATH,
                                  HMM_PATH,
                                  USER_DICT_PATH,
                                  IDF_PATH,
                                  STOP_WORD_PATH,
                                  "");
 }
 ChineseSegmentationPrivate::~ChineseSegmentationPrivate() {
    if(m_jieba)
        delete m_jieba;
    m_jieba = nullptr;
 }
 vector<KeyWord> ChineseSegmentationPrivate::callSegment(const string &sentence) {
    const size_t topk = -1;
    vector<KeyWord> keywordres;
    ChineseSegmentationPrivate::m_jieba->extractor.Extract(sentence, keywordres, topk);
    return keywordres;
 }
 vector<string> ChineseSegmentationPrivate::callMixSegmentCutStr(const string &sentence)
 {
    vector<string> keywordres;
    ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
    return keywordres;
 }
 vector<Word> ChineseSegmentationPrivate::callMixSegmentCutWord(const string &sentence)
 {
    vector<Word> keywordres;
    ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
    return keywordres;
 }
 string ChineseSegmentationPrivate::lookUpTagOfWord(const string &word)
 {
    return ChineseSegmentationPrivate::m_jieba->LookupTag(word);
 }
 vector<pair<string, string>> ChineseSegmentationPrivate::getTagOfWordsInSentence(const string &sentence)
 {
     vector<pair<string, string>> words;
     ChineseSegmentationPrivate::m_jieba->Tag(sentence, words);
     return words;
 }
 vector<Word> ChineseSegmentationPrivate::callFullSegment(const string &sentence)
 {
    vector<Word> keywordres;
    ChineseSegmentationPrivate::m_jieba->CutAll(sentence, keywordres);
    return keywordres;
 }
 vector<Word> ChineseSegmentationPrivate::callQuerySegment(const string &sentence)
 {
    vector<Word> keywordres;
    ChineseSegmentationPrivate::m_jieba->CutForSearch(sentence, keywordres);
    return keywordres;
 }
 vector<Word> ChineseSegmentationPrivate::callHMMSegment(const string &sentence)
 {
    vector<Word> keywordres;
    ChineseSegmentationPrivate::m_jieba->CutHMM(sentence, keywordres);
    return keywordres;
 }
 vector<Word> ChineseSegmentationPrivate::callMPSegment(const string &sentence)
 {
    size_t maxWordLen = 512;
    vector<Word> keywordres;
    ChineseSegmentationPrivate::m_jieba->CutSmall(sentence, keywordres, maxWordLen);
    return keywordres;
 }
 ChineseSegmentation *ChineseSegmentation::getInstance()
 {
    static ChineseSegmentation *global_instance_chinese_segmentation = new ChineseSegmentation;
    return global_instance_chinese_segmentation;
 }
 vector<KeyWord> ChineseSegmentation::callSegment(const string &sentence)
 {
    return d->callSegment(sentence);
 }
 vector<string> ChineseSegmentation::callMixSegmentCutStr(const string &sentence)
 {
    return d->callMixSegmentCutStr(sentence);
 }
 vector<Word> ChineseSegmentation::callMixSegmentCutWord(const string &str)
 {
    return d->callMixSegmentCutWord(str);
 }
 string ChineseSegmentation::lookUpTagOfWord(const string &word)
 {
    return d->lookUpTagOfWord(word);
 }
 vector<pair<string, string> > ChineseSegmentation::getTagOfWordsInSentence(const string &sentence)
 {
    return d->getTagOfWordsInSentence(sentence);
 }
 vector<Word> ChineseSegmentation::callFullSegment(const string &sentence)
 {
    return d->callFullSegment(sentence);
 }
 vector<Word> ChineseSegmentation::callQuerySegment(const string &sentence)
 {
    return d->callQuerySegment(sentence);
 }
 vector<Word> ChineseSegmentation::callHMMSegment(const string &sentence)
 {
    return d->callHMMSegment(sentence);
 }
 vector<Word> ChineseSegmentation::callMPSegment(const string &sentence)
 {
    return d->callMPSegment(sentence);
 }
 ChineseSegmentation::ChineseSegmentation() : d(new ChineseSegmentationPrivate)
 {
 }
--- a/libchinese-segmentation/chinese-segmentation.h
+++ b/libchinese-segmentation/chinese-segmentation.h
@ -1,116 +0,0 @@
 /*
 * Copyright (C) 2020, KylinSoft Co., Ltd.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * Authors: zhangzihao <zhangzihao@kylinos.cn>
 * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
 *
 */
 #ifndef CHINESESEGMENTATION_H
 #define CHINESESEGMENTATION_H
 #include "libchinese-segmentation_global.h"
 #include "common-struct.h"
 class ChineseSegmentationPrivate;
 class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
 public:
    static ChineseSegmentation *getInstance();
    /**
     * @brief ChineseSegmentation::callSegment
     * 调用extractor进行关键词提取，先使用Mix方式初步分词，再使用Idf词典进行关键词提取，只包含两字以上关键词
     *
     * @param sentence 要提取关键词的句子
     * @return vector<KeyWord> 存放提取后关键词的信息的容器
     */
    vector<KeyWord> callSegment(const string &sentence);
    /**
     * @brief ChineseSegmentation::callMixSegmentCutStr
     * 使用Mix方法进行分词，即先使用最大概率法MP初步分词，再用隐式马尔科夫模型HMM进一步分词，可以准确切出词典已有词和未登录词，结果比较准确
     *
     * @param sentence 要分词的句子
     * @return vector<string> 只存放分词后每个词的内容的容器
     */
    vector<string> callMixSegmentCutStr(const string& sentence);
    /**
     * @brief ChineseSegmentation::callMixSegmentCutWord
     * 和callMixSegmentCutStr功能相同
     * @param sentence 要分词的句子
     * @return vector<Word> 存放分词后每个词所有信息的容器
     */
    vector<Word> callMixSegmentCutWord(const string& str);
    /**
     * @brief ChineseSegmentation::lookUpTagOfWord
     * 查询word的词性
     * @param word 要查询词性的词
     * @return string word的词性
     */
    string lookUpTagOfWord(const string& word);
    /**
     * @brief ChineseSegmentation::getTagOfWordsInSentence
     * 使用Mix分词后获取每个词的词性
     * @param sentence 要分词的句子
     * @return vector<pair<string, string>> 分词后的每个词的内容(firsr)和其对应的词性(second)
     */
    vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
    /**
     * @brief ChineseSegmentation::callFullSegment
     * 使用Full进行分词，Full会切出字典里所有的词。
     * @param sentence 要分词的句子
     * @return vector<Word> 存放分词后每个词所有信息的容器
     */
    vector<Word> callFullSegment(const string& sentence);
    /**
     * @brief ChineseSegmentation::callQuerySegment
     * 使用Query进行分词，即先使用Mix，对于长词再用Full，结果最精确，但词的数量也最大
     * @param sentence 要分词的句子
     * @return vector<Word> 存放分词后每个词所有信息的容器
     */
    vector<Word> callQuerySegment(const string& sentence);
    /**
     * @brief ChineseSegmentation::callHMMSegment
     * 使用隐式马尔科夫模型HMM进行分词
     * @param sentence 要分词的句子
     * @return vector<Word> 存放分词后每个词所有信息的容器
     */
    vector<Word> callHMMSegment(const string& sentence);
    /**
     * @brief ChineseSegmentation::callMPSegment
     * 使用最大概率法MP进行分词
     * @param sentence 要分词的句子
     * @return vector<Word> 存放分词后每个词所有信息的容器
     */
    vector<Word> callMPSegment(const string& sentence);
 private:
    explicit ChineseSegmentation();
    ~ChineseSegmentation() = default;
    ChineseSegmentation(const ChineseSegmentation&) = delete;
    ChineseSegmentation& operator =(const ChineseSegmentation&) = delete;
 private:
    ChineseSegmentationPrivate *d = nullptr;
 };
 #endif // CHINESESEGMENTATION_H
--- a/libchinese-segmentation/common-struct.h
+++ b/libchinese-segmentation/common-struct.h
@ -1,52 +0,0 @@
 #ifndef COMMONSTRUCT_H
 #define COMMONSTRUCT_H
 #include <string>
 #include <vector>
 using namespace std;
 /**
 * @brief The KeyWord struct
 *
 * @property word the content of keyword
 * @property offsets the Unicode offsets, can be used to check the word pos in a sentence
 * @property weight the weight of the keyword
 */
 struct KeyWord {
    string word;
    vector<size_t> offsets;
    double weight;
    ~KeyWord() {
        word = std::move("");
        offsets.clear();
        offsets.shrink_to_fit();
    }
 };
 /**
 * @brief The Word struct
 *
 * @property word the content of word
 * @property offset the offset of the word(absolute pos, Chinese 3 , English 1)， can be used to check the word pos in a sentence
 * @property unicode_offset the Unicode offset of the word
 * @property unicode_length the Unicode length of the word
 */
 struct Word {
    string word;
    uint32_t offset;
    uint32_t unicode_offset;
    uint32_t unicode_length;
    Word(const string& w, uint32_t o)
        : word(w), offset(o) {
    }
    Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
        : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
    }
    ~Word() {
        word = std::move("");
    }
 }; // struct Word
 #endif // COMMONSTRUCT_H
--- a/libchinese-segmentation/cppjieba/DatTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DatTrie.hpp
@ -1,634 +0,0 @@
 #pragma once
 #include <stdint.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <QDebug>
 #include <algorithm>
 #include <utility>
 #include "limonp/Md5.hpp"
 #include "Unicode.hpp"
 #include "darts.h"
 namespace cppjieba {
 using std::pair;
 struct DatElement {
    string word;
    string tag;
    double weight = 0;
    bool operator < (const DatElement & b) const {
        if (word == b.word) {
            return this->weight > b.weight;
        }
        return this->word < b.word;
    }
 };
 struct IdfElement {
    string word;
    double idf = 0;
    bool operator < (const IdfElement & b) const {
        if (word == b.word) {
            return this->idf > b.idf;
        }
        return this->word < b.word;
    }
 };
 struct PinYinElement
 {
    string word;
    string tag;
    bool operator < (const DatElement & b) const {
        return this->word < b.word;
    }
 };
 inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
    return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
 }
 struct DatMemElem {
    double weight = 0.0;
    char tag[8] = {};
    void SetTag(const string & str) {
        memset(&tag[0], 0, sizeof(tag));
        strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
    }
    string GetTag() const {
        return &tag[0];
    }
 };
 struct PinYinMemElem {
    char tag[6] = {};
    void SetTag(const string & str) {
        memset(&tag[0], 0, sizeof(tag));
        strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
    }
    string GetTag() const {
        return &tag[0];
    }
 };
 inline std::ostream & operator << (std::ostream& os, const DatMemElem & elem) {
    return os << "/tag=" << elem.GetTag() << "/weight=" << elem.weight;
 }
 struct DatDag {
    limonp::LocalVector<pair<size_t, const DatMemElem *> > nexts;
    double max_weight;
    int max_next;
 };
 typedef Darts::DoubleArray JiebaDAT;
 struct CacheFileHeader {
    char md5_hex[32] = {};
    double min_weight = 0;
    uint32_t elements_num = 0;
    uint32_t dat_size = 0;
 };
 static_assert(sizeof(DatMemElem) == 16, "DatMemElem length invalid");
 static_assert((sizeof(CacheFileHeader) % sizeof(DatMemElem)) == 0, "DatMemElem CacheFileHeader length equal");
 class DatTrie {
 public:
    DatTrie() {}
    ~DatTrie() {
        ::munmap(mmap_addr_, mmap_length_);
        mmap_addr_ = nullptr;
        mmap_length_ = 0;
        ::close(mmap_fd_);
        mmap_fd_ = -1;
    }
    const DatMemElem * Find(const string & key) const {
        JiebaDAT::result_pair_type find_result;
        dat_.exactMatchSearch(key.c_str(), find_result);
        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
            return nullptr;
        }
        return &elements_ptr_[ find_result.value ];
    }
    const double Find(const string & key, std::size_t length, std::size_t node_pos) const {
        JiebaDAT::result_pair_type find_result;
        dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos);
        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
            return -1;
        }
        return idf_elements_ptr_[ find_result.value ];
    }
    const PinYinMemElem * PinYinFind(const string & key) const {
        JiebaDAT::result_pair_type find_result;
        dat_.exactMatchSearch(key.c_str(), find_result);
        if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
            return nullptr;
        }
        return &pinyin_elements_ptr_[ find_result.value ];
    }
    void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
              vector<struct DatDag>&res, size_t max_word_len) const {
        res.clear();
        res.resize(end - begin);
        string text_str;
        EncodeRunesToString(begin, end, text_str);
        static const size_t max_num = 128;
        JiebaDAT::result_pair_type result_pairs[max_num] = {};
        for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) {
            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
            res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + 1, nullptr));
            for (std::size_t idx = 0; idx < num_results; ++idx) {
                auto & match = result_pairs[idx];
                if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
                    continue;
                }
                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
                if (char_num > max_word_len) {
                    continue;
                }
                auto pValue = &elements_ptr_[match.value];
                if (1 == char_num) {
                    res[i].nexts[0].second = pValue;
                    continue;
                }
                res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + char_num, pValue));
            }
            begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune);
        }
    }
    /*
    void Find_Reverse(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
              vector<struct DatDag>&res, size_t max_word_len) const {
        res.clear();
        res.resize(end - begin);
        string text_str;
        EncodeRunesToString(begin, end, text_str);
        static const size_t max_num = 128;
        JiebaDAT::result_pair_type result_pairs[max_num] = {};
        size_t str_size = end - begin;
        for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
            begin_pos -= (end - i - 1)->len;
            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
            res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - i, nullptr));
            for (std::size_t idx = 0; idx < num_results; ++idx) {
                auto & match = result_pairs[idx];
                if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
                    continue;
                }
                auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
                if (char_num > max_word_len) {
                    continue;
                }
                auto pValue = &elements_ptr_[match.value];
                if (1 == char_num) {
                    res[str_size - i - 1].nexts[0].second = pValue;
                    continue;
                }
                res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - 1 - i + char_num, pValue));
            }
        }
    }*/
    void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
              vector<WordRange>& words, size_t max_word_len) const {
        string text_str;
        EncodeRunesToString(begin, end, text_str);
        static const size_t max_num = 128;
        JiebaDAT::result_pair_type result_pairs[max_num] = {};//存放字典查询结果
        size_t str_size = end - begin;
        double max_weight[str_size];//存放逆向路径最大weight
        for (size_t i = 0; i<str_size; i++) {
            max_weight[i] = -3.14e+100;
        }
        int max_next[str_size];//存放动态规划后的分词结果
        memset(max_next,-1,str_size);
        double val(0);
        for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
            size_t nextPos = str_size - i;//逆向计算
            begin_pos -= (end - i - 1)->len;
            std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
            if (0 == num_results) {//字典不存在则单独分词
                val = min_weight_;
                if (nextPos  < str_size) {
                    val += max_weight[nextPos];
                }
                if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
                    max_weight[nextPos - 1] = val;
                    max_next[nextPos - 1] = nextPos;
                }
            } else {//字典存在则根据查询结果数量计算最大概率路径
                for (std::size_t idx = 0; idx < num_results; ++idx) {
                    auto & match = result_pairs[idx];
                    if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
                        continue;
                    }
                    auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
                    if (char_num > max_word_len) {
                        continue;
                    }
                    auto pValue = &elements_ptr_[match.value];
                    val = pValue->weight;
                    if (1 == char_num) {
                        if (nextPos  < str_size) {
                            val += max_weight[nextPos];
                        }
                        if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
                            max_weight[nextPos - 1] = val;
                            max_next[nextPos - 1] = nextPos;
                        }
                    } else {
                        if (nextPos - 1 + char_num  < str_size) {
                            val += max_weight[nextPos - 1 + char_num];
                        }
                        if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) {
                            max_weight[nextPos - 1] = val;
                            max_next[nextPos - 1] = nextPos - 1 + char_num;
                        }
                    }
                }
            }
        }
        for (size_t i = 0; i < str_size;) {//统计动态规划结果
            assert(max_next[i] > i);
            assert(max_next[i] <= str_size);
            WordRange wr(begin + i, begin + max_next[i] - 1);
            words.push_back(wr);
            i = max_next[i];
        }
    }
    double GetMinWeight() const {
        return min_weight_;
    }
    void SetMinWeight(double d) {
        min_weight_ = d ;
    }
    bool InitBuildDat(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
        BuildDatCache(elements, dat_cache_file, md5);
        return InitAttachDat(dat_cache_file, md5);
    }
    bool InitBuildDat(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
        BuildDatCache(elements, dat_cache_file, md5);
        return InitIdfAttachDat(dat_cache_file, md5);
    }
    bool InitBuildDat(vector<PinYinElement>& elements, const string & dat_cache_file, const string & md5) {
        BuildDatCache(elements, dat_cache_file, md5);
        return InitPinYinAttachDat(dat_cache_file, md5);
    }
    bool InitAttachDat(const string & dat_cache_file, const string & md5) {
        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
        if (mmap_fd_ < 0) {
            return false;
        }
        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
        assert(seek_off >= 0);
        mmap_length_ = seek_off;
        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
        assert(MAP_FAILED != mmap_addr_);
        assert(mmap_length_ >= sizeof(CacheFileHeader));
        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
        elements_num_ = header.elements_num;
        min_weight_ = header.min_weight;
        assert(sizeof(header.md5_hex) == md5.size());
        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
            return false;
        }
        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(DatMemElem)  + header.dat_size * dat_.unit_size());
        elements_ptr_ = (const DatMemElem *)(mmap_addr_ + sizeof(header));
        const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(DatMemElem) * elements_num_;
        dat_.set_array(dat_ptr, header.dat_size);
        return true;
    }
    bool InitIdfAttachDat(const string & dat_cache_file, const string & md5) {
        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
        if (mmap_fd_ < 0) {
            return false;
        }
        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
        assert(seek_off >= 0);
        mmap_length_ = seek_off;
        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
        assert(MAP_FAILED != mmap_addr_);
        assert(mmap_length_ >= sizeof(CacheFileHeader));
        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
        elements_num_ = header.elements_num;
        min_weight_ = header.min_weight;
        assert(sizeof(header.md5_hex) == md5.size());
        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
            return false;
        }
        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double)  + header.dat_size * dat_.unit_size());
        idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header));
        const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_;
        dat_.set_array(dat_ptr, header.dat_size);
        return true;
    }
    bool InitPinYinAttachDat(const string & dat_cache_file, const string & md5) {
        mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
        if (mmap_fd_ < 0) {
            return false;
        }
        const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
        assert(seek_off >= 0);
        mmap_length_ = seek_off;
        mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
        assert(MAP_FAILED != mmap_addr_);
        assert(mmap_length_ >= sizeof(CacheFileHeader));
        CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
        elements_num_ = header.elements_num;
        min_weight_ = header.min_weight;
        assert(sizeof(header.md5_hex) == md5.size());
        if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
            return false;
        }
        assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(PinYinMemElem)  + header.dat_size * dat_.unit_size());
        pinyin_elements_ptr_ = (const PinYinMemElem *)(mmap_addr_ + sizeof(header));
        const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(PinYinMemElem) * elements_num_;
        dat_.set_array(dat_ptr, header.dat_size);
        return true;
    }
 private:
    void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
        std::sort(elements.begin(), elements.end());
        vector<const char*> keys_ptr_vec;
        vector<int> values_vec;
        vector<DatMemElem> mem_elem_vec;
        keys_ptr_vec.reserve(elements.size());
        values_vec.reserve(elements.size());
        mem_elem_vec.reserve(elements.size());
        CacheFileHeader header;
        header.min_weight = min_weight_;
        assert(sizeof(header.md5_hex) == md5.size());
        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
        for (size_t i = 0; i < elements.size(); ++i) {
            keys_ptr_vec.push_back(elements[i].word.data());
            values_vec.push_back(i);
            mem_elem_vec.push_back(DatMemElem());
            auto & mem_elem = mem_elem_vec.back();
            mem_elem.weight = elements[i].weight;
            mem_elem.SetTag(elements[i].tag);
        }
        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
        assert(0 == ret);
        header.elements_num = mem_elem_vec.size();
        header.dat_size = dat_.size();
        {
            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
            ::umask(S_IWGRP | S_IWOTH);
            //const int fd =::mkstemp(&tmp_filepath[0]);
            //原mkstemp用法有误，已修复--jxx20210519
            const int fd =::mkstemp((char *)tmp_filepath.data());
            qDebug() << "mkstemp :" << errno << tmp_filepath.data();
            assert(fd >= 0);
            ::fchmod(fd, 0644);
            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
            ::close(fd);
            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
            assert(0 == rename_ret);
        }
    }
    void BuildDatCache(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
        std::sort(elements.begin(), elements.end());
        vector<const char*> keys_ptr_vec;
        vector<int> values_vec;
        vector<double> mem_elem_vec;
        keys_ptr_vec.reserve(elements.size());
        values_vec.reserve(elements.size());
        mem_elem_vec.reserve(elements.size());
        CacheFileHeader header;
        header.min_weight = min_weight_;
        assert(sizeof(header.md5_hex) == md5.size());
        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
        for (size_t i = 0; i < elements.size(); ++i) {
            keys_ptr_vec.push_back(elements[i].word.data());
            values_vec.push_back(i);
            mem_elem_vec.push_back(elements[i].idf);
        }
        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
        assert(0 == ret);
        header.elements_num = mem_elem_vec.size();
        header.dat_size = dat_.size();
        {
            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
            ::umask(S_IWGRP | S_IWOTH);
            //const int fd =::mkstemp(&tmp_filepath[0]);
            //原mkstemp用法有误，已修复--jxx20210519
            const int fd =::mkstemp((char *)tmp_filepath.data());
            qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
            assert(fd >= 0);
            ::fchmod(fd, 0644);
            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(double) * mem_elem_vec.size());
            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(double) + dat_.total_size());
            ::close(fd);
            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
            assert(0 == rename_ret);
        }
    }
    void BuildDatCache(vector<PinYinElement>& elements, const string & dat_cache_file, const string & md5) {
        //std::sort(elements.begin(), elements.end());
        vector<const char*> keys_ptr_vec;
        vector<int> values_vec;
        vector<PinYinMemElem> mem_elem_vec;
        keys_ptr_vec.reserve(elements.size());
        values_vec.reserve(elements.size());
        mem_elem_vec.reserve(elements.size());
        CacheFileHeader header;
        header.min_weight = min_weight_;
        assert(sizeof(header.md5_hex) == md5.size());
        memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
        for (size_t i = 0; i < elements.size(); ++i) {
            keys_ptr_vec.push_back(elements[i].word.data());
            values_vec.push_back(i);
            mem_elem_vec.push_back(PinYinMemElem());
            auto & mem_elem = mem_elem_vec.back();
            mem_elem.SetTag(elements[i].tag);
        }
        auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
        assert(0 == ret);
        header.elements_num = mem_elem_vec.size();
        header.dat_size = dat_.size();
        {
            string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
            ::umask(S_IWGRP | S_IWOTH);
            //const int fd =::mkstemp(&tmp_filepath[0]);
            const int fd =::mkstemp((char *)tmp_filepath.data());
            qDebug() << "mkstemp :" << errno << tmp_filepath.data();
            assert(fd >= 0);
            ::fchmod(fd, 0644);
            auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
            write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
            write_bytes += ::write(fd, dat_.array(), dat_.total_size());
            assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
            ::close(fd);
            const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
            assert(0 == rename_ret);
        }
    }
    DatTrie(const DatTrie &);
    DatTrie &operator=(const DatTrie &);
 private:
    JiebaDAT dat_;
    const DatMemElem * elements_ptr_ = nullptr;
    const double * idf_elements_ptr_ = nullptr;
    const PinYinMemElem * pinyin_elements_ptr_ = nullptr;
    size_t elements_num_ = 0;
    double min_weight_ = 0;
    int mmap_fd_ = -1;
    size_t mmap_length_ = 0;
    char * mmap_addr_ = nullptr;
 };
 inline string CalcFileListMD5(const string & files_list, size_t & file_size_sum) {
    limonp::MD5 md5;
    const auto files = limonp::Split(files_list, "|;");
    file_size_sum = 0;
    for (auto const & local_path : files) {
        const int fd = ::open(local_path.c_str(), O_RDONLY);
        if( fd < 0){
            continue;
        }
        auto const len = ::lseek(fd, 0, SEEK_END);
        if (len > 0) {
            void * addr = ::mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
            assert(MAP_FAILED != addr);
            md5.Update((unsigned char *) addr, len);
            file_size_sum += len;
            ::munmap(addr, len);
        }
        ::close(fd);
    }
    md5.Final();
    return string(md5.digestChars);
 }
 }
--- a/libchinese-segmentation/cppjieba/DictTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DictTrie.hpp
@ -1,235 +0,0 @@
 #pragma once
 #include <iostream>
 #include <fstream>
 #include <map>
 #include <string>
 #include <cstring>
 #include <cstdlib>
 #include <stdint.h>
 #include <cmath>
 #include <limits>
 #include "limonp/StringUtil.hpp"
 #include "limonp/Logging.hpp"
 #include "Unicode.hpp"
 #include "DatTrie.hpp"
 #include <QDebug>
 namespace cppjieba {
 using namespace limonp;
 const double MIN_DOUBLE = -3.14e+100;
 const double MAX_DOUBLE = 3.14e+100;
 const size_t DICT_COLUMN_NUM = 3;
 const char* const UNKNOWN_TAG = "";
 class DictTrie {
 public:
    enum UserWordWeightOption {
        WordWeightMin,
        WordWeightMedian,
        WordWeightMax,
    }; // enum UserWordWeightOption
    DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "",
             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
        Init(dict_path, user_dict_paths, dat_cache_path, user_word_weight_opt);
    }
    ~DictTrie() {}
    const DatMemElem* Find(const string & word) const {
        return dat_.Find(word);
    }
    void Find(RuneStrArray::const_iterator begin,
              RuneStrArray::const_iterator end,
              vector<struct DatDag>&res,
              size_t max_word_len = MAX_WORD_LENGTH) const {
        dat_.Find(begin, end, res, max_word_len);
    }
    void Find(RuneStrArray::const_iterator begin,
              RuneStrArray::const_iterator end,
              vector<WordRange>& words,
              size_t max_word_len = MAX_WORD_LENGTH) const {
        dat_.Find(begin, end, words, max_word_len);
    }
    bool IsUserDictSingleChineseWord(const Rune& word) const {
        return IsIn(user_dict_single_chinese_word_, word);
    }
    double GetMinWeight() const {
        return dat_.GetMinWeight();
    }
    size_t GetTotalDictSize() const {
        return total_dict_size_;
    }
    void InserUserDictNode(const string& line, bool saveNodeInfo = true) {
        vector<string> buf;
        DatElement node_info;
        Split(line, buf, " ");
        if (buf.size() == 0) {
            return;
        }
        node_info.word = buf[0];
        node_info.weight = user_word_default_weight_;
        node_info.tag = UNKNOWN_TAG;
        if (buf.size() == 2) {
            node_info.tag = buf[1];
        } else if (buf.size() == 3) {
            if (freq_sum_ > 0.0) {
                const int freq = atoi(buf[1].c_str());
                node_info.weight = log(1.0 * freq / freq_sum_);
                node_info.tag = buf[2];
            }
        }
        if (saveNodeInfo) {
            static_node_infos_.push_back(node_info);
        }
        if (Utf8CharNum(node_info.word) == 1) {
            RuneArray word;
            if (DecodeRunesInString(node_info.word, word)) {
                user_dict_single_chinese_word_.insert(word[0]);
            } else {
                XLOG(ERROR) << "Decode " << node_info.word << " failed.";
            }
        }
    }
    void LoadUserDict(const string& filePaths, bool saveNodeInfo = true) {
        vector<string> files = limonp::Split(filePaths, "|;");
        for (size_t i = 0; i < files.size(); i++) {
            ifstream ifs(files[i].c_str());
            XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
            string line;
            for (; getline(ifs, line);) {
                if (line.size() == 0) {
                    continue;
                }
                InserUserDictNode(line, saveNodeInfo);
            }
        }
    }
 private:
    void Init(const string& dict_path, const string& user_dict_paths, string dat_cache_path,
              UserWordWeightOption user_word_weight_opt) {
        const auto dict_list = dict_path + "|" + user_dict_paths;
        size_t file_size_sum = 0;
        const string md5 = CalcFileListMD5(dict_list, file_size_sum);
        total_dict_size_ = file_size_sum;
        if (dat_cache_path.empty()) {
            //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
            dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) +  ".dat_cache";
        }
        QString path = QString::fromStdString(dat_cache_path);
        qDebug() << "#########Dict path:" << path;
        if (dat_.InitAttachDat(dat_cache_path, md5)) {
            LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_;
            return;
        }
        LoadDefaultDict(dict_path);
        freq_sum_ = CalcFreqSum(static_node_infos_);
        CalculateWeight(static_node_infos_, freq_sum_);
        double min_weight = 0;
        SetStaticWordWeights(user_word_weight_opt, min_weight);
        dat_.SetMinWeight(min_weight);
        LoadUserDict(user_dict_paths);
        const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
        assert(build_ret);
        vector<DatElement>().swap(static_node_infos_);
    }
    void LoadDefaultDict(const string& filePath) {
        ifstream ifs(filePath.c_str());
        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
        string line;
        vector<string> buf;
        for (; getline(ifs, line);) {
            Split(line, buf, " ");
            XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
            DatElement node_info;
            node_info.word = buf[0];
            node_info.weight = atof(buf[1].c_str());
            node_info.tag = buf[2];
            static_node_infos_.push_back(node_info);
        }
    }
    static bool WeightCompare(const DatElement& lhs, const DatElement& rhs) {
        return lhs.weight < rhs.weight;
    }
    void SetStaticWordWeights(UserWordWeightOption option, double & min_weight) {
        XCHECK(!static_node_infos_.empty());
        vector<DatElement> x = static_node_infos_;
        sort(x.begin(), x.end(), WeightCompare);
        if(x.empty()){
            return;
        }
        min_weight = x[0].weight;
        const double max_weight_ = x[x.size() - 1].weight;
        const double median_weight_ = x[x.size() / 2].weight;
        switch (option) {
            case WordWeightMin:
                user_word_default_weight_ = min_weight;
                break;
            case WordWeightMedian:
                user_word_default_weight_ = median_weight_;
                break;
            default:
                user_word_default_weight_ = max_weight_;
                break;
        }
    }
    double CalcFreqSum(const vector<DatElement>& node_infos) const {
        double sum = 0.0;
        for (size_t i = 0; i < node_infos.size(); i++) {
            sum += node_infos[i].weight;
        }
        return sum;
    }
    void CalculateWeight(vector<DatElement>& node_infos, double sum) const {
        for (size_t i = 0; i < node_infos.size(); i++) {
            DatElement& node_info = node_infos[i];
            assert(node_info.weight > 0.0);
            node_info.weight = log(double(node_info.weight) / sum);
        }
    }
 private:
    vector<DatElement> static_node_infos_;
    size_t total_dict_size_ = 0;
    DatTrie dat_;
    double freq_sum_;
    double user_word_default_weight_;
    unordered_set<Rune> user_dict_single_chinese_word_;
 };
 }
--- a/libchinese-segmentation/cppjieba/FullSegment.hpp
+++ b/libchinese-segmentation/cppjieba/FullSegment.hpp
@ -1,58 +0,0 @@
 #pragma once
 #include <algorithm>
 #include <set>
 #include <cassert>
 #include "limonp/Logging.hpp"
 #include "DictTrie.hpp"
 #include "SegmentBase.hpp"
 #include "Unicode.hpp"
 namespace cppjieba {
 class FullSegment: public SegmentBase {
 public:
    FullSegment(const DictTrie* dictTrie)
        : dictTrie_(dictTrie) {
        assert(dictTrie_);
    }
    ~FullSegment() { }
    virtual void Cut(RuneStrArray::const_iterator begin,
                     RuneStrArray::const_iterator end,
                     vector<WordRange>& res, bool, size_t) const override {
        assert(dictTrie_);
        vector<struct DatDag> dags;
        dictTrie_->Find(begin, end, dags);
        size_t max_word_end_pos = 0;
        for (size_t i = 0; i < dags.size(); i++) {
            for (const auto & kv : dags[i].nexts) {
                const size_t nextoffset = kv.first - 1;
                assert(nextoffset < dags.size());
                const auto wordLen = nextoffset - i + 1;
                const bool is_not_covered_single_word = ((dags[i].nexts.size() == 1) && (max_word_end_pos <= i));
                const bool is_oov = (nullptr == kv.second); //Out-of-Vocabulary
                if ((is_not_covered_single_word) || ((not is_oov) && (wordLen >= 2))) {
                    WordRange wr(begin + i, begin + nextoffset);
                    res.push_back(wr);
                }
                max_word_end_pos = max(max_word_end_pos, nextoffset + 1);
            }
        }
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                     size_t) const override {
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t) const override {
    }
 private:
    const DictTrie* dictTrie_;
 };
 }
--- a/libchinese-segmentation/cppjieba/HMMModel.hpp
+++ b/libchinese-segmentation/cppjieba/HMMModel.hpp
@ -1,140 +0,0 @@
 #pragma once
 #include "limonp/StringUtil.hpp"
 namespace cppjieba {
 using namespace limonp;
 typedef unordered_map<Rune, double> EmitProbMap;
 struct HMMModel {
    /*
     * STATUS:
     * 0: HMMModel::B, 1: HMMModel::E, 2: HMMModel::M, 3:HMMModel::S
     * */
    enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
    HMMModel(const string& modelPath) {
        memset(startProb, 0, sizeof(startProb));
        memset(transProb, 0, sizeof(transProb));
        statMap[0] = 'B';
        statMap[1] = 'E';
        statMap[2] = 'M';
        statMap[3] = 'S';
        emitProbVec.push_back(&emitProbB);
        emitProbVec.push_back(&emitProbE);
        emitProbVec.push_back(&emitProbM);
        emitProbVec.push_back(&emitProbS);
        LoadModel(modelPath);
    }
    ~HMMModel() {
    }
    void LoadModel(const string& filePath) {
        ifstream ifile(filePath.c_str());
        XCHECK(ifile.is_open()) << "open " << filePath << " failed";
        string line;
        vector<string> tmp;
        vector<string> tmp2;
        //Load startProb
        XCHECK(GetLine(ifile, line));
        Split(line, tmp, " ");
        XCHECK(tmp.size() == STATUS_SUM);
        for (size_t j = 0; j < tmp.size(); j++) {
            startProb[j] = atof(tmp[j].c_str());
        }
        //Load transProb
        for (size_t i = 0; i < STATUS_SUM; i++) {
            XCHECK(GetLine(ifile, line));
            Split(line, tmp, " ");
            XCHECK(tmp.size() == STATUS_SUM);
            for (size_t j = 0; j < tmp.size(); j++) {
                transProb[i][j] = atof(tmp[j].c_str());
            }
        }
        //Load emitProbB
        XCHECK(GetLine(ifile, line));
        XCHECK(LoadEmitProb(line, emitProbB));
        //Load emitProbE
        XCHECK(GetLine(ifile, line));
        XCHECK(LoadEmitProb(line, emitProbE));
        //Load emitProbM
        XCHECK(GetLine(ifile, line));
        XCHECK(LoadEmitProb(line, emitProbM));
        //Load emitProbS
        XCHECK(GetLine(ifile, line));
        XCHECK(LoadEmitProb(line, emitProbS));
    }
    double GetEmitProb(const EmitProbMap* ptMp, Rune key,
                       double defVal)const {
        EmitProbMap::const_iterator cit = ptMp->find(key);
        if (cit == ptMp->end()) {
            return defVal;
        }
        return cit->second;
    }
    bool GetLine(ifstream& ifile, string& line) {
        while (getline(ifile, line)) {
            Trim(line);
            if (line.empty()) {
                continue;
            }
            if (StartsWith(line, "#")) {
                continue;
            }
            return true;
        }
        return false;
    }
    bool LoadEmitProb(const string& line, EmitProbMap& mp) {
        if (line.empty()) {
            return false;
        }
        vector<string> tmp, tmp2;
        RuneArray unicode;
        Split(line, tmp, ",");
        for (size_t i = 0; i < tmp.size(); i++) {
            Split(tmp[i], tmp2, ":");
            if (2 != tmp2.size()) {
                XLOG(ERROR) << "emitProb illegal.";
                return false;
            }
            if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
                XLOG(ERROR) << "TransCode failed.";
                return false;
            }
            mp[unicode[0]] = atof(tmp2[1].c_str());
        }
        return true;
    }
    char statMap[STATUS_SUM];
    double startProb[STATUS_SUM];
    double transProb[STATUS_SUM][STATUS_SUM];
    EmitProbMap emitProbB;
    EmitProbMap emitProbE;
    EmitProbMap emitProbM;
    EmitProbMap emitProbS;
    vector<EmitProbMap* > emitProbVec;
 }; // struct HMMModel
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/HMMSegment.hpp
+++ b/libchinese-segmentation/cppjieba/HMMSegment.hpp
@ -1,195 +0,0 @@
 #pragma once
 #include <iostream>
 #include <fstream>
 #include <memory.h>
 #include <cassert>
 #include "HMMModel.hpp"
 #include "SegmentBase.hpp"
 namespace cppjieba {
 class HMMSegment: public SegmentBase {
 public:
    HMMSegment(const HMMModel* model)
        : model_(model) {
    }
    ~HMMSegment() { }
    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool,
                     size_t) const override {
        RuneStrArray::const_iterator left = begin;
        RuneStrArray::const_iterator right = begin;
        while (right != end) {
            if (right->rune < 0x80) { //asc码
                if (left != right) {
                    InternalCut(left, right, res);
                }
                left = right;
                do {
                    right = SequentialLetterRule(left, end);//非英文字符则返回left，否则返回left后非英文字母的位置
                    if (right != left) {
                        break;
                    }
                    right = NumbersRule(left, end);//非数字则返回left，否则返回left后非数字的位置
                    if (right != left) {
                        break;
                    }
                    right ++;
                } while (false);
                WordRange wr(left, right - 1);
                res.push_back(wr);
                left = right;
            } else {
                right++;
            }
        }
        if (left != right) {
            InternalCut(left, right, res);
        }
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                     size_t) const override {
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t) const override {
    }
 private:
    // sequential letters rule
    RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin,
                                                      RuneStrArray::const_iterator end) const {
        Rune x = begin->rune;
        if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
            begin ++;
        } else {
            return begin;
        }
        while (begin != end) {
            x = begin->rune;
            if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
                begin ++;
            } else {
                break;
            }
        }
        return begin;
    }
    //
    RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
        Rune x = begin->rune;
        if ('0' <= x && x <= '9') {
            begin ++;
        } else {
            return begin;
        }
        while (begin != end) {
            x = begin->rune;
            if (('0' <= x && x <= '9') || x == '.') {
                begin++;
            } else {
                break;
            }
        }
        return begin;
    }
    void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
        vector<size_t> status;
        Viterbi(begin, end, status);
        RuneStrArray::const_iterator left = begin;
        RuneStrArray::const_iterator right;
        for (size_t i = 0; i < status.size(); i++) {
            if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
                right = begin + i + 1;
                WordRange wr(left, right - 1);
                res.push_back(wr);
                left = right;
            }
        }
    }
    void Viterbi(RuneStrArray::const_iterator begin,
                 RuneStrArray::const_iterator end,
                 vector<size_t>& status) const {
        size_t Y = HMMModel::STATUS_SUM;
        size_t X = end - begin;
        size_t XYSize = X * Y;
        size_t now, old, stat;
        double tmp, endE, endS;
        //vector<int> path(XYSize);
        //vector<double> weight(XYSize);
        int path[XYSize];
        double weight[XYSize];
        //start
        for (size_t y = 0; y < Y; y++) {
            weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
            path[0 + y * X] = -1;
        }
        double emitProb;
        for (size_t x = 1; x < X; x++) {
            for (size_t y = 0; y < Y; y++) {
                now = x + y * X;
                weight[now] = MIN_DOUBLE;
                path[now] = HMMModel::E; // warning
                emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE);
                for (size_t preY = 0; preY < Y; preY++) {
                    old = x - 1 + preY * X;
                    tmp = weight[old] + model_->transProb[preY][y] + emitProb;
                    if (tmp > weight[now]) {
                        weight[now] = tmp;
                        path[now] = preY;
                    }
                }
            }
        }
        endE = weight[X - 1 + HMMModel::E * X];
        endS = weight[X - 1 + HMMModel::S * X];
        stat = 0;
        if (endE >= endS) {
            stat = HMMModel::E;
        } else {
            stat = HMMModel::S;
        }
        status.resize(X);
        for (int x = X - 1 ; x >= 0; x--) {
            status[x] = stat;
            stat = path[x + stat * X];
        }
    }
    const HMMModel* model_;
 }; // class HMMSegment
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/IdfTrie.hpp
+++ b/libchinese-segmentation/cppjieba/IdfTrie.hpp
@ -1,117 +0,0 @@
 #pragma once
 #include <iostream>
 #include <fstream>
 #include <map>
 #include <string>
 #include <cstring>
 #include <cstdlib>
 #include <stdint.h>
 #include <cmath>
 #include <limits>
 #include "limonp/StringUtil.hpp"
 #include "limonp/Logging.hpp"
 #include "Unicode.hpp"
 #include "DatTrie.hpp"
 #include <QDebug>
 namespace cppjieba {
 using namespace limonp;
 const size_t IDF_COLUMN_NUM = 2;
 class IdfTrie {
 public:
    enum UserWordWeightOption {
        WordWeightMin,
        WordWeightMedian,
        WordWeightMax,
    }; // enum UserWordWeightOption
    IdfTrie(const string& dict_path, const string & dat_cache_path = "",
             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
        Init(dict_path, dat_cache_path, user_word_weight_opt);
    }
    ~IdfTrie() {}
    double Find(const string & word, std::size_t length = 0, std::size_t node_pos = 0) const {
        return dat_.Find(word, length, node_pos);
    }
    size_t GetTotalDictSize() const {
        return total_dict_size_;
    }
 private:
    void Init(const string& dict_path, string dat_cache_path,
              UserWordWeightOption user_word_weight_opt) {
        size_t file_size_sum = 0;
        const string md5 = CalcFileListMD5(dict_path, file_size_sum);
        total_dict_size_ = file_size_sum;
        if (dat_cache_path.empty()) {
            //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
            dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) +  ".dat_cache";
        }
        QString path = QString::fromStdString(dat_cache_path);
        qDebug() << "#########Idf path:" << path;
        if (dat_.InitIdfAttachDat(dat_cache_path, md5)) {
            return;
        }
        LoadDefaultIdf(dict_path);
        double idf_sum_ = CalcIdfSum(static_node_infos_);
        assert(static_node_infos_.size());
        idfAverage_ = idf_sum_ / static_node_infos_.size();
        assert(idfAverage_ > 0.0);
        double min_weight = 0;
        dat_.SetMinWeight(min_weight);
        const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
        assert(build_ret);
        vector<IdfElement>().swap(static_node_infos_);
    }
    void LoadDefaultIdf(const string& filePath) {
        ifstream ifs(filePath.c_str());
        if(not ifs.is_open()){
            return ;
        }
        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
        string line;
        vector<string> buf;
        size_t lineno = 0;
        for (; getline(ifs, line); lineno++) {
            if (line.empty()) {
                XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
                continue;
            }
            Split(line, buf, " ");
            XCHECK(buf.size() == IDF_COLUMN_NUM) << "split result illegal, line:" << line;
            IdfElement node_info;
            node_info.word = buf[0];
            node_info.idf = atof(buf[1].c_str());
            static_node_infos_.push_back(node_info);
        }
    }
    double CalcIdfSum(const vector<IdfElement>& node_infos) const {
        double sum = 0.0;
        for (size_t i = 0; i < node_infos.size(); i++) {
            sum += node_infos[i].idf;
        }
        return sum;
    }
 public:
    double idfAverage_;
 private:
    vector<IdfElement> static_node_infos_;
    size_t total_dict_size_ = 0;
    DatTrie dat_;
 };
 }
--- a/libchinese-segmentation/cppjieba/Jieba.hpp
+++ b/libchinese-segmentation/cppjieba/Jieba.hpp
@ -1,101 +0,0 @@
 #pragma once
 #include <memory>
 #include "QuerySegment.hpp"
 #include "KeywordExtractor.hpp"
 namespace cppjieba {
 class Jieba {
 public:
    Jieba(const string& dict_path,
          const string& model_path,
          const string& user_dict_path,
          const string& idfPath = "",
          const string& stopWordPath = "",
          const string& dat_cache_path = "")
        : dict_trie_(dict_path, user_dict_path, dat_cache_path),
          model_(model_path),
          mp_seg_(&dict_trie_),
          hmm_seg_(&model_),
          mix_seg_(&dict_trie_, &model_, stopWordPath),
          full_seg_(&dict_trie_),
          query_seg_(&dict_trie_, &model_, stopWordPath),
          extractor(&dict_trie_, &model_, idfPath, dat_cache_path,stopWordPath){ }
    ~Jieba() { }
    void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
        mix_seg_.CutToStr(sentence, words, hmm);
    }
    void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
        mix_seg_.CutToWord(sentence, words, hmm);
    }
    void CutAll(const string& sentence, vector<string>& words) const {
        full_seg_.CutToStr(sentence, words);
    }
    void CutAll(const string& sentence, vector<Word>& words) const {
        full_seg_.CutToWord(sentence, words);
    }
    void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
        query_seg_.CutToStr(sentence, words, hmm);
    }
    void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
        query_seg_.CutToWord(sentence, words, hmm);
    }
    void CutHMM(const string& sentence, vector<string>& words) const {
        hmm_seg_.CutToStr(sentence, words);
    }
    void CutHMM(const string& sentence, vector<Word>& words) const {
        hmm_seg_.CutToWord(sentence, words);
    }
    void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
        mp_seg_.CutToStr(sentence, words, false, max_word_len);
    }
    void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
        mp_seg_.CutToWord(sentence, words, false, max_word_len);
    }
    void Tag(const string& sentence, vector<pair<string, string> >& words) const {
        mix_seg_.Tag(sentence, words);
    }
    string LookupTag(const string &str) const {
        return mix_seg_.LookupTag(str);
    }
    bool Find(const string& word) {
        return nullptr != dict_trie_.Find(word);
    }
    void ResetSeparators(const string& s) {
        //TODO
        mp_seg_.ResetSeparators(s);
        hmm_seg_.ResetSeparators(s);
        mix_seg_.ResetSeparators(s);
        full_seg_.ResetSeparators(s);
        query_seg_.ResetSeparators(s);
    }
    const DictTrie* GetDictTrie() const {
        return &dict_trie_;
    }
    const HMMModel* GetHMMModel() const {
        return &model_;
    }
 private:
    DictTrie dict_trie_;
    HMMModel model_;
    // They share the same dict trie and model
    MPSegment mp_seg_;
    HMMSegment hmm_seg_;
    MixSegment mix_seg_;
    FullSegment full_seg_;
    QuerySegment query_seg_;
 public:
    KeywordExtractor extractor;
 }; // class Jieba
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
@ -1,99 +0,0 @@
 #pragma once
 #include <cmath>
 #include "MixSegment.hpp"
 #include "IdfTrie.hpp"
 namespace cppjieba {
 using namespace limonp;
 using namespace std;
 /*utf8*/
 class KeywordExtractor {
 public:
    KeywordExtractor(const DictTrie* dictTrie,
                     const HMMModel* model,
                     const string& idfPath,
                     const string& dat_cache_path,
                     const string& stopWordPath)
        : segment_(dictTrie, model, stopWordPath),
        idf_trie_(idfPath,dat_cache_path){
    }
    ~KeywordExtractor() {
    }
    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
        vector<KeyWord> topWords;
        Extract(sentence, topWords, topN);
        for (size_t i = 0; i < topWords.size(); i++) {
            keywords.push_back(topWords[i].word);
        }
    }
    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
        vector<KeyWord> topWords;
        Extract(sentence, topWords, topN);
        for (size_t i = 0; i < topWords.size(); i++) {
            keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
        }
    }
    void Extract(const string& sentence, vector<KeyWord>& keywords, size_t topN) const {
        unordered_map<string, KeyWord> wordmap;//插入字符串与Word的map，相同string统计词频叠加权重
        PreFilter pre_filter(symbols_, sentence);
        RuneStrArray::const_iterator null_p;
        WordRange range(null_p, null_p);
        bool isNull(false);
        while (pre_filter.Next(range, isNull)) {
            if (isNull) {
                continue;
            }
            segment_.CutToStr(sentence, range,  wordmap);
        }
        keywords.clear();
        keywords.reserve(wordmap.size());
        for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
            double idf = idf_trie_.Find(itr->first);
            if (-1 != idf) {//IDF词典查找
                itr->second.weight *= idf;
            } else {
                itr->second.weight *= idf_trie_.idfAverage_;
            }
            itr->second.word = itr->first;
            keywords.push_back(itr->second);
        }
        topN = min(topN, keywords.size());
        partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
        keywords.resize(topN);
    }
 private:
    static bool Compare(const KeyWord& lhs, const KeyWord& rhs) {
        return lhs.weight > rhs.weight;
    }
    MixSegment segment_;
    IdfTrie idf_trie_;
    unordered_set<Rune> symbols_;
 }; // class KeywordExtractor
 inline ostream& operator << (ostream& os, const KeyWord& word) {
    return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
           "}";
 }
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/MPSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MPSegment.hpp
@ -1,129 +0,0 @@
 #pragma once
 #include <algorithm>
 #include <set>
 #include <cassert>
 #include "limonp/Logging.hpp"
 #include "DictTrie.hpp"
 #include "SegmentTagged.hpp"
 #include "PosTagger.hpp"
 namespace cppjieba {
 class MPSegment: public SegmentTagged {
 public:
    MPSegment(const DictTrie* dictTrie)
        : dictTrie_(dictTrie) {
        assert(dictTrie_);
    }
    ~MPSegment() { }
    virtual void Cut(RuneStrArray::const_iterator begin,
                     RuneStrArray::const_iterator end,
                     vector<WordRange>& words,
                     bool, size_t max_word_len) const override {
 //        vector<DatDag> dags;
 //        dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx
 //        CalcDP(dags);//动态规划（Dynamic Programming，DP），根据DAG计算最优动态规划路径--jxx
 //        CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx
        dictTrie_->Find(begin, end, words, max_word_len);
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                     size_t) const override {
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t) const override {
    }
    const DictTrie* GetDictTrie() const override {
        return dictTrie_;
    }
    bool Tag(const string& src, vector<pair<string, string> >& res) const override {
        return tagger_.Tag(src, res, *this);
    }
    bool IsUserDictSingleChineseWord(const Rune& value) const {
        return dictTrie_->IsUserDictSingleChineseWord(value);
    }
 private:
 /*
    void CalcDP(vector<DatDag>& dags) const {
        double val(0);
        for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) {
            rit->max_next = -1;
            rit->max_weight = MIN_DOUBLE;
            for (const auto & it : rit->nexts) {
                const auto nextPos = it.first;
                val = dictTrie_->GetMinWeight();
                if (nullptr != it.second) {
                    val = it.second->weight;
                }
                if (nextPos  < dags.size()) {
                    val += dags[nextPos].max_weight;
                }
                if ((nextPos <= dags.size()) && (val > rit->max_weight)) {
                    rit->max_weight = val;
                    rit->max_next = nextPos;
                }
            }
        }
    }
 */
 /*  倒叙方式重写CalcDP函数，初步测试未发现问题*/
    void CalcDP(vector<DatDag>& dags) const {
        double val(0);
        size_t size = dags.size();
        for (size_t i = 0; i < size; i++) {
            dags[size - 1 - i].max_next = -1;
            dags[size - 1 - i].max_weight = MIN_DOUBLE;
            for (const auto & it : dags[size - 1 - i].nexts) {
                const auto nextPos = it.first;
                val = dictTrie_->GetMinWeight();
                if (nullptr != it.second) {
                    val = it.second->weight;
                }
                if (nextPos  < dags.size()) {
                    val += dags[nextPos].max_weight;
                }
                if ((nextPos <= dags.size()) && (val > dags[size - 1 - i].max_weight)) {
                    dags[size - 1 - i].max_weight = val;
                    dags[size - 1 - i].max_next = nextPos;
                }
            }
        }
    }
    void CutByDag(RuneStrArray::const_iterator begin,
                  RuneStrArray::const_iterator,
                  const vector<DatDag>& dags,
                  vector<WordRange>& words) const {
        for (size_t i = 0; i < dags.size();) {
            const auto next = dags[i].max_next;
            assert(next > i);
            assert(next <= dags.size());
            WordRange wr(begin + i, begin + next - 1);
            words.push_back(wr);
            i = next;
        }
    }
    const DictTrie* dictTrie_;
    PosTagger tagger_;
 }; // class MPSegment
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/MixSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MixSegment.hpp
@ -1,245 +0,0 @@
 #pragma once
 #include <cassert>
 #include "MPSegment.hpp"
 #include "HMMSegment.hpp"
 #include "limonp/StringUtil.hpp"
 #include "PosTagger.hpp"
 namespace cppjieba {
 class MixSegment: public SegmentTagged {
 public:
    MixSegment(const DictTrie* dictTrie,
               const HMMModel* model,
               const string& stopWordPath)
        : mpSeg_(dictTrie), hmmSeg_(model) {
        LoadStopWordDict(stopWordPath);
    }
    ~MixSegment() {}
    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
                     size_t) const override {
        if (!hmm) {
            mpSeg_.CutRuneArray(begin, end, res);
            return;
        }
        vector<WordRange> words;
        assert(end >= begin);
        words.reserve(end - begin);
        mpSeg_.CutRuneArray(begin, end, words);
        vector<WordRange> hmmRes;
        hmmRes.reserve(end - begin);
        for (size_t i = 0; i < words.size(); i++) {
            //if mp Get a word, it's ok, put it into result
            if (words[i].left != words[i].right || (words[i].left == words[i].right &&
                                                    mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
                res.push_back(words[i]);
                continue;
            }
            // if mp Get a single one and it is not in userdict, collect it in sequence
            size_t j = i;
            while (j < words.size() && words[j].left == words[j].right &&
                   !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
                j++;
            }
            // Cut the sequence with hmm
            assert(j - 1 >= i);
            // TODO
            hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
            //put hmm result to result
            for (size_t k = 0; k < hmmRes.size(); k++) {
                res.push_back(hmmRes[k]);
            }
            //clear tmp vars
            hmmRes.clear();
            //let i jump over this piece
            i = j - 1;
        }
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                     size_t) const override {
        //目前hmm默认开启，后期如有需要关闭再修改--jxx20210519
 //        if (!hmm) {
 //            mpSeg_.CutRuneArray(begin, end, res);
 //            return;
 //        }
        vector<WordRange> words;
        assert(end >= begin);
        words.reserve(end - begin);
        mpSeg_.CutRuneArray(begin, end, words);
        vector<WordRange> hmmRes;
        hmmRes.reserve(end - begin);
        for (size_t i = 0; i < words.size(); i++) {
            //if mp Get a word, it's ok, put it into result
            if (words[i].left != words[i].right) {
                res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
                continue;
            }
            if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
                    || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
                res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
                continue;
            }
            // if mp Get a single one and it is not in userdict, collect it in sequence
            size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里（i字符不是最后一个字符），直接判定j字符
            while (j < (words.size() - 1) && words[j].left == words[j].right &&
                   !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
                j++;
            }
            // Cut the sequence with hmm
            assert(j - 1 >= i);
            // TODO
            hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
            //put hmm result to result
            for (size_t k = 0; k < hmmRes.size(); k++) {
                res.push_back(GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right));
            }
            //clear tmp vars
            hmmRes.clear();
            //let i jump over this piece
            i = j - 1;
        }
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t) const override {
        vector<WordRange> words;
        vector<WordRange> hmmRes;
        assert(end >= begin);
        if (3 == begin->len or 4 == begin->len) {
            words.reserve(end - begin);
            mpSeg_.CutRuneArray(begin, end, words);
            hmmRes.reserve(words.size());
        } else {
            hmmRes.reserve(end - begin);
        }
        if (words.size() != 0) {//存在中文分词结果
            for (size_t i = 0; i < words.size(); i++) {
                string str = GetStringFromRunes(s, words[i].left, words[i].right);
                if (words[i].left != words[i].right) {
                    if (stopWords_.find(str) != stopWords_.end()) {
                        continue;
                    }
                    res[str].offsets.push_back(words[i].left->offset);
                    res[str].weight += 1.0;
                    continue;
                }
                if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
                        || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
                    if (stopWords_.find(str) != stopWords_.end()) {
                        continue;
                    }
                    res[str].offsets.push_back(words[i].left->offset);
                    res[str].weight += 1.0;
                    continue;
                }
                // if mp Get a single one and it is not in userdict, collect it in sequence
                size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里（i字符不是最后一个字符），直接判定j字符
                bool isLastWordsSingle(false);
                while (j <= (words.size() - 1)
                       && words[j].left == words[j].right
                       && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
                    if (j == (words.size() - 1)) {//最后一个分词结果是单字
                        isLastWordsSingle = true;
                        break;
                    }
                    j++;
                }
                // Cut the sequence with hmm
                assert(j - 1 >= i);
                // TODO
                if (isLastWordsSingle) {
                    hmmSeg_.CutRuneArray(words[i].left, words[j].left + 1, hmmRes);
                } else {
                    hmmSeg_.CutRuneArray(words[i].left, words[j].left, hmmRes);
                }
                //put hmm result to result
                for (size_t k = 0; k < hmmRes.size(); k++) {
                    string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right);
                    if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) {
                        continue;
                    }
                    res[hmmStr].offsets.push_back(hmmRes[k].left->offset);
                    res[hmmStr].weight += 1.0;
                }
                //clear tmp vars
                hmmRes.clear();
                //let i jump over this piece
                if (isLastWordsSingle) {
                    break;
                }
                i = j - 1;
            }
        } else {//不存在中文分词结果
            for (size_t i = 0; i < (size_t)(end - begin); i++) {
                string str = s.substr((begin+i)->offset, (begin+i)->len);
                res[str].offsets.push_back((begin+i)->offset);
                res[str].weight += 1.0;
            }
        }
    }
    const DictTrie* GetDictTrie() const override {
        return mpSeg_.GetDictTrie();
    }
    bool Tag(const string& src, vector<pair<string, string> >& res) const override {
        return tagger_.Tag(src, res, *this);
    }
    string LookupTag(const string &str) const {
        return tagger_.LookupTag(str, *this);
    }
    void LoadStopWordDict(const string& filePath) {
        ifstream ifs(filePath.c_str());
        if(not ifs.is_open()){
            return ;
        }
        XCHECK(ifs.is_open()) << "open " << filePath << " failed";
        string line ;
        while (getline(ifs, line)) {
            stopWords_.insert(line);
        }
        assert(stopWords_.size());
    }
 private:
    unordered_set<string> stopWords_;
    MPSegment mpSeg_;
    HMMSegment hmmSeg_;
    PosTagger tagger_;
 }; // class MixSegment
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/PinYinTrie.hpp
+++ b/libchinese-segmentation/cppjieba/PinYinTrie.hpp
@ -1,154 +0,0 @@
 #pragma once
 #include <iostream>
 #include <fstream>
 #include <map>
 #include <string>
 #include <cstring>
 #include <cstdlib>
 #include <stdint.h>
 #include <cmath>
 #include <limits>
 #include "limonp/StringUtil.hpp"
 #include "limonp/Logging.hpp"
 #include "Unicode.hpp"
 #include "DatTrie.hpp"
 #include <QDebug>
 namespace cppjieba {
 using namespace limonp;
 const size_t PINYIN_COLUMN_NUM = 2;
 class PinYinTrie {
 public:
    enum UserWordWeightOption {
        WordWeightMin,
        WordWeightMedian,
        WordWeightMax,
    }; // enum UserWordWeightOption
    PinYinTrie(const string& dict_path, const string & dat_cache_path = "",
             UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
        Init(dict_path, dat_cache_path, user_word_weight_opt);
    }
    ~PinYinTrie() {}
    int getMultiTonResults(string word, QStringList &results) {
        if (qmap_chinese2pinyin.contains(QString::fromStdString(word))) {
            for (auto i:qmap_chinese2pinyin[QString::fromStdString(word)])
                results.push_back(i);
            return 0;
        }
        return -1;
    }
    int getSingleTonResult(string word, QString &result) {
        const PinYinMemElem * tmp = dat_.PinYinFind(word);
        if (tmp) {
            result = QString::fromStdString(tmp->GetTag());
            return 0;
        }
        return -1;
    }
    bool contains(string &word) {
        if (qmap_chinese2pinyin.contains(QString::fromStdString(word))
                or !dat_.PinYinFind(word))
            return true;
 //        if (map_chinese2pinyin.contains(word)
 //                or !dat_.PinYinFind(word))
 //            return true;
        return false;
    }
    bool isMultiTone(const string &word) {
        if (qmap_chinese2pinyin.contains(QString::fromStdString(word)))
            return true;
 //        if (map_chinese2pinyin.contains(word))
 //            return true;
        return false;
    }
    size_t GetTotalDictSize() const {
        return total_dict_size_;
    }
 private:
    void Init(const string& dict_path, string dat_cache_path,
              UserWordWeightOption user_word_weight_opt) {
        size_t file_size_sum = 0;
        vector<PinYinElement> node_infos;
        const string md5 = CalcFileListMD5(dict_path, file_size_sum);
        total_dict_size_ = file_size_sum;
        if (dat_cache_path.empty()) {
            //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
            dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) +  ".dat_cache";
        }
        QString path = QString::fromStdString(dat_cache_path);
        qDebug() << "#########PinYin path:" << path << file_size_sum;
        if (dat_.InitPinYinAttachDat(dat_cache_path, md5)) {
            //多音字仍需遍历文件信息
            LoadDefaultPinYin(node_infos, dict_path, true);
            return;
        }
        LoadDefaultPinYin(node_infos, dict_path, false);
        double min_weight = 0;
        dat_.SetMinWeight(min_weight);
        const auto build_ret = dat_.InitBuildDat(node_infos, dat_cache_path, md5);
        assert(build_ret);
        vector<PinYinElement>().swap(node_infos);
    }
    void LoadDefaultPinYin(vector<PinYinElement> &node_infos, const string& filePath, bool multiFlag) {
        ifstream ifs(filePath.c_str());
        if(not ifs.is_open()){
            return ;
        }
        XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
        string line;
        vector<string> buf;
        size_t lineno = 0;
        for (; getline(ifs, line); lineno++) {
            if (line.empty()) {
                XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
                continue;
            }
            Split(line, buf, " ");
            if (buf.size() == PINYIN_COLUMN_NUM) {
                if (multiFlag) {//非多音字
                    continue;
                }
                PinYinElement node_info;
                node_info.word = buf[1];
                node_info.tag = buf[0];
                node_infos.push_back(node_info);
            } else {//多音字
                QString content = QString::fromUtf8(line.c_str());
                qmap_chinese2pinyin[content.split(" ").last().trimmed()] = content.split(" ");
                qmap_chinese2pinyin[content.split(" ").last().trimmed()].pop_back();
                /*
                 //std map string list
                 list<string> tmpList;
                 for(int i = 0; i < buf.size() - 1; ++i){
                    tmpList.push_back(buf[i]);
                 }
                 map[buf[buf.size() - 1]] = tmpList;
                */
            }
        }
    }
 private:
    QMap<QString, QStringList> qmap_chinese2pinyin;
    //map<string, list<string>> map_chinese2pinyin;
    size_t total_dict_size_ = 0;
    DatTrie dat_;
 };
 }
--- a/libchinese-segmentation/cppjieba/PosTagger.hpp
+++ b/libchinese-segmentation/cppjieba/PosTagger.hpp
@ -1,83 +0,0 @@
 #pragma once
 #include "limonp/StringUtil.hpp"
 #include "DictTrie.hpp"
 #include "SegmentTagged.hpp"
 namespace cppjieba {
 using namespace limonp;
 static const char* const POS_M = "m";
 static const char* const POS_ENG = "eng";
 static const char* const POS_X = "x";
 class PosTagger {
 public:
    PosTagger() {
    }
    ~PosTagger() {
    }
    bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
        vector<string> CutRes;
        segment.CutToStr(src, CutRes);
        for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
            res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
        }
        return !res.empty();
    }
    string LookupTag(const string &str, const SegmentTagged& segment) const {
        const DictTrie * dict = segment.GetDictTrie();
        assert(dict != NULL);
        const auto tmp = dict->Find(str);
        if (tmp == NULL || tmp->GetTag().empty()) {
            RuneStrArray runes;
            if (!DecodeRunesInString(str, runes)) {
                XLOG(ERROR) << "Decode failed.";
                return POS_X;
            }
            return SpecialRule(runes);
        } else {
            return tmp->GetTag();
        }
    }
 private:
    const char* SpecialRule(const RuneStrArray& unicode) const {
        size_t m = 0;
        size_t eng = 0;
        for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
            if (unicode[i].rune < 0x80) {
                eng ++;
                if ('0' <= unicode[i].rune && unicode[i].rune <= '9') {
                    m++;
                }
            }
        }
        // ascii char is not found
        if (eng == 0) {
            return POS_X;
        }
        // all the ascii is number char
        if (m == eng) {
            return POS_M;
        }
        // the ascii chars contain english letter
        return POS_ENG;
    }
 }; // class PosTagger
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/PreFilter.hpp
+++ b/libchinese-segmentation/cppjieba/PreFilter.hpp
@ -1,126 +0,0 @@
 #pragma once
 #include "limonp/Logging.hpp"
 #include <unordered_set>
 #include "Unicode.hpp"
 namespace cppjieba {
 class PreFilter {
 public:
    PreFilter(const std::unordered_set<Rune>& symbols,
              const string& sentence)
        : symbols_(symbols) {
        if (!DecodeRunesInString(sentence, sentence_)) {
            XLOG(ERROR) << "decode failed. "<<sentence;
        }
        cursor_ = sentence_.begin();
    }
    ~PreFilter() {
    }
    bool HasNext() const {
        return cursor_ != sentence_.end();
    }
    bool Next(WordRange& wordRange) {
        if (cursor_ == sentence_.end()) {
            return false;
        }
        wordRange.left = cursor_;
        while (cursor_->rune == 0x20 && cursor_ != sentence_.end()) {
            cursor_++;
        }
        if (cursor_ == sentence_.end()) {
            wordRange.right = cursor_;
            return true;
        }
        while (++cursor_ != sentence_.end()) {
            if (cursor_->rune == 0x20) {
                wordRange.right = cursor_;
                return true;
            }
        }
        wordRange.right = sentence_.end();
        return true;
    }
    bool Next(WordRange& wordRange, bool& isNull) {
        isNull = false;
        if (cursor_ == sentence_.end()) {
            return false;
        }
        wordRange.left = cursor_;
        if (cursor_->rune == 0x20) {
            while (cursor_ != sentence_.end()) {
                if (cursor_->rune != 0x20) {
                    if (wordRange.left == cursor_) {
                        cursor_ ++;
                    }
                    wordRange.right = cursor_;
                    isNull = true;
                    return true;
                }
                cursor_ ++;
            }
        }
        int max_num = 0;
        uint32_t utf8_num = cursor_->len;
        while (cursor_ != sentence_.end()) {
            if (cursor_->rune == 0x20) {
                if (wordRange.left == cursor_) {
                    cursor_ ++;
                }
                wordRange.right = cursor_;
                return true;
            }
            cursor_ ++;
            max_num++;
            if (max_num >= 1024 or cursor_->len != utf8_num) { //todo 防止一次性传入过多字节，暂定限制为1024个字
                wordRange.right = cursor_;
                return true;
            }
        }
        wordRange.right = sentence_.end();
        return true;
    }
    WordRange Next() {
        WordRange range(cursor_, cursor_);
        while (cursor_ != sentence_.end()) {
            //if (IsIn(symbols_, cursor_->rune)) {
            if (cursor_->rune == 0x20) {
                if (range.left == cursor_) {
                    cursor_ ++;
                }
                range.right = cursor_;
                return range;
            }
            cursor_ ++;
        }
        range.right = sentence_.end();
        return range;
    }
 private:
    RuneStrArray::const_iterator cursor_;
    RuneStrArray sentence_;
    const std::unordered_set<Rune>& symbols_;
 }; // class PreFilter
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/QuerySegment.hpp
+++ b/libchinese-segmentation/cppjieba/QuerySegment.hpp
@ -1,83 +0,0 @@
 #pragma once
 #include <algorithm>
 #include <set>
 #include <cassert>
 #include "limonp/Logging.hpp"
 #include "DictTrie.hpp"
 #include "SegmentBase.hpp"
 #include "FullSegment.hpp"
 #include "MixSegment.hpp"
 #include "Unicode.hpp"
 #include "DictTrie.hpp"
 namespace cppjieba {
 class QuerySegment: public SegmentBase {
 public:
    QuerySegment(const DictTrie* dictTrie,
                 const HMMModel* model,
                 const string& stopWordPath)
        : mixSeg_(dictTrie, model, stopWordPath), trie_(dictTrie) {
    }
    ~QuerySegment() {
    }
    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
                     size_t) const override {
        //use mix Cut first
        vector<WordRange> mixRes;
        mixSeg_.CutRuneArray(begin, end, mixRes, hmm);
        vector<WordRange> fullRes;
        for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
            if (mixResItr->Length() > 2) {
                for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
                    string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 2);
                    if (trie_->Find(text) != NULL) {
                        WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
                        res.push_back(wr);
                    }
                }
            }
            if (mixResItr->Length() > 3) {
                for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
                    string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 3);
                    if (trie_->Find(text) != NULL) {
                        WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
                        res.push_back(wr);
                    }
                }
            }
            res.push_back(*mixResItr);
        }
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                     size_t) const override {
    }
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t) const override {
    }
 private:
    bool IsAllAscii(const RuneArray& s) const {
        for (size_t i = 0; i < s.size(); i++) {
            if (s[i] >= 0x80) {
                return false;
            }
        }
        return true;
    }
    MixSegment mixSeg_;
    const DictTrie* trie_;
 }; // QuerySegment
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/SegmentBase.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentBase.hpp
@ -1,94 +0,0 @@
 #pragma once
 #include "limonp/Logging.hpp"
 #include "PreFilter.hpp"
 #include <cassert>
 namespace cppjieba {
 const char* const SPECIAL_SEPARATORS = " \t\n\xEF\xBC\x8C\xE3\x80\x82";
 using namespace limonp;
 class SegmentBase {
 public:
    SegmentBase() {
        XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
    }
    virtual ~SegmentBase() { }
    virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
                     size_t max_word_len) const = 0;
    //添加基于sentence的cut方法，减少中间变量的存储与格式转换--jxx20210517
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                     size_t max_word_len) const = 0;
    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
                     size_t max_word_len) const = 0;
    //重写CutToStr函数，简化获取vector<string>& words的流程，降低内存占用--jxx20210517
    void CutToStr(const string& sentence, vector<string>& words, bool hmm = true,
                  size_t max_word_len = MAX_WORD_LENGTH) const {
        PreFilter pre_filter(symbols_, sentence);
        words.clear();
        words.reserve(sentence.size() / 2);//todo 参考源码，参数待定
        RuneStrArray::const_iterator null_p;
        WordRange range(null_p, null_p);
        while (pre_filter.Next(range)) {
            CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
        }
    }
    void CutToStr(const string& sentence, WordRange range, vector<string>& words, bool hmm = true,
                  size_t max_word_len = MAX_WORD_LENGTH) const {
        CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
    }
    void CutToStr(const string& sentence, WordRange range, unordered_map<string, KeyWord>& words, bool hmm = true,
                  size_t max_word_len = MAX_WORD_LENGTH) const {
        CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
    }
    void CutToWord(const string& sentence, vector<Word>& words, bool hmm = true,
                   size_t max_word_len = MAX_WORD_LENGTH) const {
        PreFilter pre_filter(symbols_, sentence);
        vector<WordRange> wrs;
        wrs.reserve(sentence.size() / 2);
        while (pre_filter.HasNext()) {
            auto range = pre_filter.Next();
            Cut(range.left, range.right, wrs, hmm, max_word_len);
        }
        words.clear();
        words.reserve(wrs.size());
        GetWordsFromWordRanges(sentence, wrs, words);
        wrs.clear();
        vector<WordRange>().swap(wrs);
    }
    void CutRuneArray(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res,
                      bool hmm = true, size_t max_word_len = MAX_WORD_LENGTH) const {
        Cut(begin, end, res, hmm, max_word_len);
    }
    bool ResetSeparators(const string& s) {
        symbols_.clear();
        RuneStrArray runes;
        if (!DecodeRunesInString(s, runes)) {
            XLOG(ERROR) << "decode " << s << " failed";
            return false;
        }
        for (size_t i = 0; i < runes.size(); i++) {
            if (!symbols_.insert(runes[i].rune).second) {
                XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
                return false;
            }
        }
        return true;
    }
 protected:
    unordered_set<Rune> symbols_;
 }; // class SegmentBase
 } // cppjieba
--- a/libchinese-segmentation/cppjieba/SegmentTagged.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentTagged.hpp
@ -1,21 +0,0 @@
 #pragma once
 #include "SegmentBase.hpp"
 namespace cppjieba {
 class SegmentTagged : public SegmentBase {
 public:
    SegmentTagged() {
    }
    virtual ~SegmentTagged() {
    }
    virtual bool Tag(const string& src, vector<pair<string, string> >& res) const = 0;
    virtual const DictTrie* GetDictTrie() const = 0;
 }; // class SegmentTagged
 } // cppjieba
--- a/libchinese-segmentation/cppjieba/TextRankExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/TextRankExtractor.hpp
@ -1,205 +0,0 @@
 #include <cmath>
 #include "Jieba.hpp"
 namespace cppjieba {
 using namespace limonp;
 using namespace std;
 class TextRankExtractor {
 public:
    typedef struct _Word {
        string word;
        vector<size_t> offsets;
        double weight;
    }    Word; // struct Word
 private:
    typedef std::map<string, Word> WordMap;
    class WordGraph {
    private:
        typedef double Score;
        typedef string Node;
        typedef std::set<Node> NodeSet;
        typedef std::map<Node, double> Edges;
        typedef std::map<Node, Edges> Graph;
        //typedef std::unordered_map<Node,double> Edges;
        //typedef std::unordered_map<Node,Edges> Graph;
        double d;
        Graph graph;
        NodeSet nodeSet;
    public:
        WordGraph(): d(0.85) {};
        WordGraph(double in_d): d(in_d) {};
        void addEdge(Node start, Node end, double weight) {
            Edges temp;
            Edges::iterator gotEdges;
            nodeSet.insert(start);
            nodeSet.insert(end);
            graph[start][end] += weight;
            graph[end][start] += weight;
        }
        void rank(WordMap &ws, size_t rankTime = 10) {
            WordMap outSum;
            Score wsdef, min_rank, max_rank;
            if (graph.size() == 0) {
                return;
            }
            wsdef = 1.0 / graph.size();
            for (Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) {
                // edges->first start节点；edge->first end节点；edge->second 权重
                ws[edges->first].word = edges->first;
                ws[edges->first].weight = wsdef;
                outSum[edges->first].weight = 0;
                for (Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) {
                    outSum[edges->first].weight += edge->second;
                }
            }
            //sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
            for (size_t i = 0; i < rankTime; i++) {
                for (NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) {
                    double s = 0;
                    for (Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
                        // edge->first end节点；edge->second 权重
                    {
                        s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
                    }
                    ws[*node].weight = (1 - d) + d * s;
                }
            }
            min_rank = max_rank = ws.begin()->second.weight;
            for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
                if (i->second.weight < min_rank) {
                    min_rank = i->second.weight;
                }
                if (i->second.weight > max_rank) {
                    max_rank = i->second.weight;
                }
            }
            for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
                ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
            }
        }
    };
 public:
    TextRankExtractor(const DictTrie* dictTrie,
                      const HMMModel* model,
                      const string& stopWordPath)
        : segment_(dictTrie, model) {
        LoadStopWordDict(stopWordPath);
    }
    TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
        LoadStopWordDict(stopWordPath);
    }
    ~TextRankExtractor() {
    }
    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
        vector<Word> topWords;
        Extract(sentence, topWords, topN);
        for (size_t i = 0; i < topWords.size(); i++) {
            keywords.push_back(topWords[i].word);
        }
    }
    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
        vector<Word> topWords;
        Extract(sentence, topWords, topN);
        for (size_t i = 0; i < topWords.size(); i++) {
            keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
        }
    }
    void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
        vector<string> words;
        segment_.CutToStr(sentence, words);
        TextRankExtractor::WordGraph graph;
        WordMap wordmap;
        size_t offset = 0;
        for (size_t i = 0; i < words.size(); i++) {
            size_t t = offset;
            offset += words[i].size();
            if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
                continue;
            }
            for (size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) {
                if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
                    skip++;
                    continue;
                }
                graph.addEdge(words[i], words[j], 1);
            }
            wordmap[words[i]].offsets.push_back(t);
        }
        if (offset != sentence.size()) {
            XLOG(ERROR) << "words illegal";
            return;
        }
        graph.rank(wordmap, rankTime);
        keywords.clear();
        keywords.reserve(wordmap.size());
        for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
            keywords.push_back(itr->second);
        }
        topN = min(topN, keywords.size());
        partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
        keywords.resize(topN);
    }
 private:
    void LoadStopWordDict(const string& filePath) {
        ifstream ifs(filePath.c_str());
        XCHECK(ifs.is_open()) << "open " << filePath << " failed";
        string line ;
        while (getline(ifs, line)) {
            stopWords_.insert(line);
        }
        assert(stopWords_.size());
    }
    static bool Compare(const Word &x, const Word &y) {
        return x.weight > y.weight;
    }
    MixSegment segment_;
    unordered_set<string> stopWords_;
 }; // class TextRankExtractor
 inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
    return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
           "}";
 }
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/Unicode.hpp
+++ b/libchinese-segmentation/cppjieba/Unicode.hpp
@ -1,264 +0,0 @@
 #pragma once
 #include <stdint.h>
 #include <stdlib.h>
 #include <string>
 #include <vector>
 #include <ostream>
 #include "limonp/LocalVector.hpp"
 #include "limonp/StringUtil.hpp"
 #include "common-struct.h"
 namespace cppjieba {
 using std::string;
 using std::vector;
 typedef uint32_t Rune;
 //struct KeyWord {
 //    string word;
 //    vector<size_t> offsets;
 //    double weight;
 //}; // struct Word
 //struct Word {
 //    string word;
 //    uint32_t offset;
 //    uint32_t unicode_offset;
 //    uint32_t unicode_length;
 //    Word(const string& w, uint32_t o)
 //        : word(w), offset(o) {
 //    }
 //    Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
 //        : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
 //    }
 //}; // struct Word
 inline std::ostream& operator << (std::ostream& os, const Word& w) {
    return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
 }
 struct RuneInfo {
    Rune rune;
    uint32_t offset;
    uint32_t len;
    uint32_t unicode_offset = 0;
    uint32_t unicode_length = 0;
    RuneInfo(): rune(0), offset(0), len(0) {
    }
    RuneInfo(Rune r, uint32_t o, uint32_t l)
        : rune(r), offset(o), len(l) {
    }
    RuneInfo(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
        : rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
    }
 }; // struct RuneInfo
 inline std::ostream& operator << (std::ostream& os, const RuneInfo& r) {
    return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
 }
 typedef limonp::LocalVector<Rune> RuneArray;
 typedef limonp::LocalVector<struct RuneInfo> RuneStrArray;
 // [left, right]
 struct WordRange {
    RuneStrArray::const_iterator left;
    RuneStrArray::const_iterator right;
    WordRange(RuneStrArray::const_iterator l, RuneStrArray::const_iterator r)
        : left(l), right(r) {
    }
    size_t Length() const {
        return right - left;
    }
    bool IsAllAscii() const {
        for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
            if (iter->rune >= 0x80) {
                return false;
            }
        }
        return true;
    }
 }; // struct WordRange
 inline bool DecodeRunesInString(const string& s, RuneArray& arr) {
    arr.clear();
    return limonp::Utf8ToUnicode32(s, arr);
 }
 inline RuneArray DecodeRunesInString(const string& s) {
    RuneArray result;
    DecodeRunesInString(s, result);
    return result;
 }
 //重写DecodeRunesInString函数，将实现放入函数中降低内存占用加快处理流程--jxx20210518
 inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
    uint32_t tmp;
    uint32_t offset = 0;
    runes.clear();
    uint32_t len(0);
    for (size_t i = 0; i < s.size();) {
      if (!(s.data()[i] & 0x80)) { // 0xxxxxxx
        // 7bit, total 7bit
        tmp = (uint8_t)(s.data()[i]) & 0x7f;
        i++;
        len = 1;
      } else if ((uint8_t)s.data()[i] <= 0xdf && i + 1 < s.size()) { // 110xxxxxx
        // 5bit, total 5bit
        tmp = (uint8_t)(s.data()[i]) & 0x1f;
        // 6bit, total 11bit
        tmp <<= 6;
        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
        i += 2;
        len = 2;
      } else if((uint8_t)s.data()[i] <= 0xef && i + 2 < s.size()) { // 1110xxxxxx
        // 4bit, total 4bit
        tmp = (uint8_t)(s.data()[i]) & 0x0f;
        // 6bit, total 10bit
        tmp <<= 6;
        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
        // 6bit, total 16bit
        tmp <<= 6;
        tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
        i += 3;
        len = 3;
      } else if((uint8_t)s.data()[i] <= 0xf7 && i + 3 < s.size()) { // 11110xxxx
        // 3bit, total 3bit
        tmp = (uint8_t)(s.data()[i]) & 0x07;
        // 6bit, total 9bit
        tmp <<= 6;
        tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
        // 6bit, total 15bit
        tmp <<= 6;
        tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
        // 6bit, total 21bit
        tmp <<= 6;
        tmp |= (uint8_t)(s.data()[i+3]) & 0x3f;
        i += 4;
        len = 4;
      } else {
        return false;
      }
      RuneInfo x(tmp, offset, len, i, 1);
      runes.push_back(x);
      offset += len;
    }
    return true;
 }
 class RunePtrWrapper {
 public:
    const RuneInfo * m_ptr = nullptr;
 public:
    explicit RunePtrWrapper(const RuneInfo * p) : m_ptr(p) {}
    uint32_t operator *() {
        return m_ptr->rune;
    }
    RunePtrWrapper operator ++(int) {
        m_ptr ++;
        return RunePtrWrapper(m_ptr);
    }
    bool operator !=(const RunePtrWrapper & b) const {
        return this->m_ptr != b.m_ptr;
    }
 };
 inline string EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) {
    string str;
    RunePtrWrapper it_begin(begin), it_end(end);
    limonp::Unicode32ToUtf8(it_begin, it_end, str);
    return str;
 }
 inline void EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, string& str) {
    RunePtrWrapper it_begin(begin), it_end(end);
    limonp::Unicode32ToUtf8(it_begin, it_end, str);
    return;
 }
 class Unicode32Counter {
 public :
    size_t length = 0;
    void clear() {
        length = 0;
    }
    void push_back(uint32_t) {
        ++length;
    }
 };
 inline size_t Utf8CharNum(const char * str, size_t length) {
    Unicode32Counter c;
    if (limonp::Utf8ToUnicode32(str, length, c)) {
        return c.length;
    }
    return 0;
 }
 inline size_t Utf8CharNum(const string & str) {
    return Utf8CharNum(str.data(), str.size());
 }
 inline bool IsSingleWord(const string& str) {
    return Utf8CharNum(str) == 1;
 }
 // [left, right]
 inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
    assert(right->offset >= left->offset);
    uint32_t len = right->offset - left->offset + right->len;
    uint32_t unicode_length = right->unicode_offset - left->unicode_offset + right->unicode_length;
    return Word(s.substr(left->offset, len), left->offset, left->unicode_offset, unicode_length);
 }
 inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
    assert(right->offset >= left->offset);
    //uint32_t len = right->offset - left->offset + right->len;
    return s.substr(left->offset, right->offset - left->offset + right->len);
 }
 inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
    for (size_t i = 0; i < wrs.size(); i++) {
        words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
    }
 }
 inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
    for (size_t i = 0; i < wrs.size(); i++) {
        words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
    }
 }
 inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
    strs.resize(words.size());
    for (size_t i = 0; i < words.size(); ++i) {
        strs[i] = words[i].word;
    }
 }
 const size_t MAX_WORD_LENGTH = 512;
 } // namespace cppjieba
--- a/libchinese-segmentation/cppjieba/cppjieba.pri
+++ b/libchinese-segmentation/cppjieba/cppjieba.pri
@ -1,22 +0,0 @@
 INCLUDEPATH += $$PWD
 HEADERS += \
    $$PWD/DictTrie.hpp \
    $$PWD/IdfTrie.hpp \
    $$PWD/PinYinTrie.hpp \
    $$PWD/FullSegment.hpp \
    $$PWD/HMMModel.hpp \
    $$PWD/HMMSegment.hpp \
    $$PWD/Jieba.hpp \
    $$PWD/KeywordExtractor.hpp \
    $$PWD/MPSegment.hpp \
    $$PWD/MixSegment.hpp \
    $$PWD/PosTagger.hpp \
    $$PWD/PreFilter.hpp \
    $$PWD/QuerySegment.hpp \
    $$PWD/SegmentBase.hpp \
    $$PWD/SegmentTagged.hpp \
    $$PWD/TextRankExtractor.hpp \
    $$PWD/Trie.hpp \
    $$PWD/Unicode.hpp
 include(limonp/limonp.pri)
--- a/libchinese-segmentation/cppjieba/darts.h
+++ b/libchinese-segmentation/cppjieba/darts.h
--- a/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp
@ -1,70 +0,0 @@
 /************************************
 * file enc : ascii
 * author   : wuyanyi09@gmail.com
 ************************************/
 #ifndef LIMONP_ARGV_FUNCTS_H
 #define LIMONP_ARGV_FUNCTS_H
 #include <set>
 #include <sstream>
 #include "StringUtil.hpp"
 namespace limonp {
 using namespace std;
 class ArgvContext {
 public :
  ArgvContext(int argc, const char* const * argv) {
    for(int i = 0; i < argc; i++) {
      if(StartsWith(argv[i], "-")) {
        if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
          mpss_[argv[i]] = argv[i+1];
          i++;
        } else {
          sset_.insert(argv[i]);
        }
      } else {
        args_.push_back(argv[i]);
      }
    }
  }
  ~ArgvContext() {
  }
  friend ostream& operator << (ostream& os, const ArgvContext& args);
  string operator [](size_t i) const {
    if(i < args_.size()) {
      return args_[i];
    }
    return "";
  }
  string operator [](const string& key) const {
    map<string, string>::const_iterator it = mpss_.find(key);
    if(it != mpss_.end()) {
      return it->second;
    }
    return "";
  }
  bool HasKey(const string& key) const {
    if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
      return true;
    }
    return false;
  }
 private:
  vector<string> args_;
  map<string, string> mpss_;
  set<string> sset_;
 }; // class ArgvContext
 inline ostream& operator << (ostream& os, const ArgvContext& args) {
  return os<<args.args_<<args.mpss_<<args.sset_;
 }
 } // namespace limonp
 #endif
--- a/libchinese-segmentation/cppjieba/limonp/BlockingQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BlockingQueue.hpp
@ -1,49 +0,0 @@
 #ifndef LIMONP_BLOCKINGQUEUE_HPP
 #define LIMONP_BLOCKINGQUEUE_HPP
 #include <queue>
 #include "Condition.hpp"
 namespace limonp {
 template<class T>
 class BlockingQueue: NonCopyable {
 public:
  BlockingQueue()
    : mutex_(), notEmpty_(mutex_), queue_() {
  }
  void Push(const T& x) {
    MutexLockGuard lock(mutex_);
    queue_.push(x);
    notEmpty_.Notify(); // Wait morphing saves us
  }
  T Pop() {
    MutexLockGuard lock(mutex_);
    // always use a while-loop, due to spurious wakeup
    while (queue_.empty()) {
      notEmpty_.Wait();
    }
    assert(!queue_.empty());
    T front(queue_.front());
    queue_.pop();
    return front;
  }
  size_t Size() const {
    MutexLockGuard lock(mutex_);
    return queue_.size();
  }
  bool Empty() const {
    return Size() == 0;
  }
 private:
  mutable MutexLock mutex_;
  Condition         notEmpty_;
  std::queue<T>     queue_;
 }; // class BlockingQueue
 } // namespace limonp
 #endif // LIMONP_BLOCKINGQUEUE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp
@ -1,67 +0,0 @@
 #ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
 #define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
 #include "BoundedQueue.hpp"
 namespace limonp {
 template<typename T>
 class BoundedBlockingQueue : NonCopyable {
 public:
  explicit BoundedBlockingQueue(size_t maxSize)
    : mutex_(),
      notEmpty_(mutex_),
      notFull_(mutex_),
      queue_(maxSize) {
  }
  void Push(const T& x) {
    MutexLockGuard lock(mutex_);
    while (queue_.Full()) {
      notFull_.Wait();
    }
    assert(!queue_.Full());
    queue_.Push(x);
    notEmpty_.Notify();
  }
  T Pop() {
    MutexLockGuard lock(mutex_);
    while (queue_.Empty()) {
      notEmpty_.Wait();
    }
    assert(!queue_.Empty());
    T res = queue_.Pop();
    notFull_.Notify();
    return res;
  }
  bool Empty() const {
    MutexLockGuard lock(mutex_);
    return queue_.Empty();
  }
  bool Full() const {
    MutexLockGuard lock(mutex_);
    return queue_.Full();
  }
  size_t size() const {
    MutexLockGuard lock(mutex_);
    return queue_.size();
  }
  size_t capacity() const {
    return queue_.capacity();
  }
 private:
  mutable MutexLock          mutex_;
  Condition                  notEmpty_;
  Condition                  notFull_;
  BoundedQueue<T>  queue_;
 }; // class BoundedBlockingQueue
 } // namespace limonp
 #endif // LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp
@ -1,65 +0,0 @@
 #ifndef LIMONP_BOUNDED_QUEUE_HPP
 #define LIMONP_BOUNDED_QUEUE_HPP
 #include <vector>
 #include <fstream>
 #include <cassert>
 namespace limonp {
 using namespace std;
 template<class T>
 class BoundedQueue {
 public:
  explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
    head_ = 0;
    tail_ = 0;
    size_ = 0;
    assert(capacity_);
  }
  ~BoundedQueue() {
  }
  void Clear() {
    head_ = 0;
    tail_ = 0;
    size_ = 0;
  }
  bool Empty() const {
    return !size_;
  }
  bool Full() const {
    return capacity_ == size_;
  }
  size_t Size() const {
    return size_;
  }
  size_t Capacity() const {
    return capacity_;
  }
  void Push(const T& t) {
    assert(!Full());
    circular_buffer_[tail_] = t;
    tail_ = (tail_ + 1) % capacity_;
    size_ ++;
  }
  T Pop() {
    assert(!Empty());
    size_t oldPos = head_;
    head_ = (head_ + 1) % capacity_;
    size_ --;
    return circular_buffer_[oldPos];
  }
 private:
  size_t head_;
  size_t tail_;
  size_t size_;
  const size_t capacity_;
  vector<T> circular_buffer_;
 }; // class BoundedQueue
 } // namespace limonp
 #endif
--- a/libchinese-segmentation/cppjieba/limonp/Closure.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Closure.hpp
@ -1,206 +0,0 @@
 #ifndef LIMONP_CLOSURE_HPP
 #define LIMONP_CLOSURE_HPP
 namespace limonp {
 class ClosureInterface {
 public:
  virtual ~ClosureInterface() {
  }
  virtual void Run() = 0;
 };
 template <class Funct>
 class Closure0: public ClosureInterface {
 public:
  Closure0(Funct fun) {
    fun_ = fun;
  }
  virtual ~Closure0() {
  }
  virtual void Run() {
    (*fun_)();
  }
 private:
  Funct fun_;
 }; 
 template <class Funct, class Arg1>
 class Closure1: public ClosureInterface {
 public:
  Closure1(Funct fun, Arg1 arg1) {
    fun_ = fun;
    arg1_ = arg1;
  }
  virtual ~Closure1() {
  }
  virtual void Run() {
    (*fun_)(arg1_);
  }
 private:
  Funct fun_;
  Arg1 arg1_;
 }; 
 template <class Funct, class Arg1, class Arg2>
 class Closure2: public ClosureInterface {
 public:
  Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
    fun_ = fun;
    arg1_ = arg1;
    arg2_ = arg2;
  }
  virtual ~Closure2() {
  }
  virtual void Run() {
    (*fun_)(arg1_, arg2_);
  }
 private:
  Funct fun_;
  Arg1 arg1_;
  Arg2 arg2_;
 }; 
 template <class Funct, class Arg1, class Arg2, class Arg3>
 class Closure3: public ClosureInterface {
 public:
  Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
    fun_ = fun;
    arg1_ = arg1;
    arg2_ = arg2;
    arg3_ = arg3;
  }
  virtual ~Closure3() {
  }
  virtual void Run() {
    (*fun_)(arg1_, arg2_, arg3_);
  }
 private:
  Funct fun_;
  Arg1 arg1_;
  Arg2 arg2_;
  Arg3 arg3_;
 }; 
 template <class Obj, class Funct> 
 class ObjClosure0: public ClosureInterface {
 public:
  ObjClosure0(Obj* p, Funct fun) {
   p_ = p;
   fun_ = fun;
  }
  virtual ~ObjClosure0() {
  }
  virtual void Run() {
    (p_->*fun_)();
  }
 private:
  Obj* p_;
  Funct fun_;
 }; 
 template <class Obj, class Funct, class Arg1> 
 class ObjClosure1: public ClosureInterface {
 public:
  ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
   p_ = p;
   fun_ = fun;
   arg1_ = arg1;
  }
  virtual ~ObjClosure1() {
  }
  virtual void Run() {
    (p_->*fun_)(arg1_);
  }
 private:
  Obj* p_;
  Funct fun_;
  Arg1 arg1_;
 }; 
 template <class Obj, class Funct, class Arg1, class Arg2> 
 class ObjClosure2: public ClosureInterface {
 public:
  ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
   p_ = p;
   fun_ = fun;
   arg1_ = arg1;
   arg2_ = arg2;
  }
  virtual ~ObjClosure2() {
  }
  virtual void Run() {
    (p_->*fun_)(arg1_, arg2_);
  }
 private:
  Obj* p_;
  Funct fun_;
  Arg1 arg1_;
  Arg2 arg2_;
 }; 
 template <class Obj, class Funct, class Arg1, class Arg2, class Arg3> 
 class ObjClosure3: public ClosureInterface {
 public:
  ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
   p_ = p;
   fun_ = fun;
   arg1_ = arg1;
   arg2_ = arg2;
   arg3_ = arg3;
  }
  virtual ~ObjClosure3() {
  }
  virtual void Run() {
    (p_->*fun_)(arg1_, arg2_, arg3_);
  }
 private:
  Obj* p_;
  Funct fun_;
  Arg1 arg1_;
  Arg2 arg2_;
  Arg3 arg3_;
 }; 
 template<class R>
 ClosureInterface* NewClosure(R (*fun)()) {
  return new Closure0<R (*)()>(fun);
 }
 template<class R, class Arg1>
 ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) {
  return new Closure1<R (*)(Arg1), Arg1>(fun, arg1);
 }
 template<class R, class Arg1, class Arg2>
 ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
  return new Closure2<R (*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
 }
 template<class R, class Arg1, class Arg2, class Arg3>
 ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
  return new Closure3<R (*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
 }
 template<class R, class Obj>
 ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) {
  return new ObjClosure0<Obj, R (Obj::* )()>(obj, fun);
 }
 template<class R, class Obj, class Arg1>
 ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) {
  return new ObjClosure1<Obj, R (Obj::* )(Arg1), Arg1>(obj, fun, arg1);
 }
 template<class R, class Obj, class Arg1, class Arg2>
 ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
  return new ObjClosure2<Obj, R (Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
 }
 template<class R, class Obj, class Arg1, class Arg2, class Arg3>
 ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
  return new ObjClosure3<Obj, R (Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
 }
 } // namespace limonp
 #endif // LIMONP_CLOSURE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Colors.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Colors.hpp
@ -1,31 +0,0 @@
 #ifndef LIMONP_COLOR_PRINT_HPP
 #define LIMONP_COLOR_PRINT_HPP
 #include <string>
 #include <stdarg.h>
 namespace limonp {
 using std::string;
 enum Color {
  BLACK = 30,
  RED,
  GREEN,
  YELLOW,
  BLUE,
  PURPLE
 }; // enum Color
 static void ColorPrintln(enum Color color, const char * fmt, ...) {
  va_list ap;
  printf("\033[0;%dm", color);
  va_start(ap, fmt);
  vprintf(fmt, ap);
  va_end(ap);
  printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
 }
 } // namespace limonp
 #endif // LIMONP_COLOR_PRINT_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Condition.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Condition.hpp
@ -1,38 +0,0 @@
 #ifndef LIMONP_CONDITION_HPP
 #define LIMONP_CONDITION_HPP
 #include "MutexLock.hpp"
 namespace limonp {
 class Condition : NonCopyable {
 public:
  explicit Condition(MutexLock& mutex)
    : mutex_(mutex) {
    XCHECK(!pthread_cond_init(&pcond_, NULL));
  }
  ~Condition() {
    XCHECK(!pthread_cond_destroy(&pcond_));
  }
  void Wait() {
    XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
  }
  void Notify() {
    XCHECK(!pthread_cond_signal(&pcond_));
  }
  void NotifyAll() {
    XCHECK(!pthread_cond_broadcast(&pcond_));
  }
 private:
  MutexLock& mutex_;
  pthread_cond_t pcond_;
 }; // class Condition
 } // namespace limonp
 #endif // LIMONP_CONDITION_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Config.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Config.hpp
@ -1,103 +0,0 @@
 /************************************
 * file enc : utf8
 * author   : wuyanyi09@gmail.com
 ************************************/
 #ifndef LIMONP_CONFIG_H
 #define LIMONP_CONFIG_H
 #include <map>
 #include <fstream>
 #include <iostream>
 #include <assert.h>
 #include "StringUtil.hpp"
 namespace limonp {
 using namespace std;
 class Config {
 public:
  explicit Config(const string& filePath) {
    LoadFile(filePath);
  }
  operator bool () {
    return !map_.empty();
  }
  string Get(const string& key, const string& defaultvalue) const {
    map<string, string>::const_iterator it = map_.find(key);
    if(map_.end() != it) {
      return it->second;
    }
    return defaultvalue;
  }
  int Get(const string& key, int defaultvalue) const {
    string str = Get(key, "");
    if("" == str) {
      return defaultvalue;
    }
    return atoi(str.c_str());
  }
  const char* operator [] (const char* key) const {
    if(NULL == key) {
      return NULL;
    }
    map<string, string>::const_iterator it = map_.find(key);
    if(map_.end() != it) {
      return it->second.c_str();
    }
    return NULL;
  }
  string GetConfigInfo() const {
    string res;
    res << *this;
    return res;
  }
 private:
  void LoadFile(const string& filePath) {
    ifstream ifs(filePath.c_str());
    assert(ifs);
    string line;
    vector<string> vecBuf;
    size_t lineno = 0;
    while(getline(ifs, line)) {
      lineno ++;
      Trim(line);
      if(line.empty() || StartsWith(line, "#")) {
        continue;
      }
      vecBuf.clear();
      Split(line, vecBuf, "=");
      if(2 != vecBuf.size()) {
        fprintf(stderr, "line[%s] illegal.\n", line.c_str());
        assert(false);
        continue;
      }
      string& key = vecBuf[0];
      string& value = vecBuf[1];
      Trim(key);
      Trim(value);
      if(!map_.insert(make_pair(key, value)).second) {
        fprintf(stderr, "key[%s] already exits.\n", key.c_str());
        assert(false);
        continue;
      }
    }
    ifs.close();
  }
  friend ostream& operator << (ostream& os, const Config& config);
  map<string, string> map_;
 }; // class Config
 inline ostream& operator << (ostream& os, const Config& config) {
  return os << config.map_;
 }
 } // namespace limonp
 #endif // LIMONP_CONFIG_H
--- a/libchinese-segmentation/cppjieba/limonp/FileLock.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/FileLock.hpp
@ -1,74 +0,0 @@
 #ifndef LIMONP_FILELOCK_HPP
 #define LIMONP_FILELOCK_HPP
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <fcntl.h>
 #include <errno.h>
 #include <string>
 #include <string.h>
 #include <assert.h>
 namespace limonp {
 using std::string;
 class FileLock {
 public:
  FileLock() : fd_(-1), ok_(true) {
  }
  ~FileLock() {
    if(fd_ > 0) {
      Close();
    }
  }
  void Open(const string& fname) {
    assert(fd_ == -1);
    fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
    if(fd_ < 0) {
      ok_ = false;
      err_ = strerror(errno);
    }
  }
  void Close() {
    ::close(fd_);
  }
  void Lock() {
    if(LockOrUnlock(fd_, true) < 0) {
      ok_ = false;
      err_ = strerror(errno);
    }
  }
  void UnLock() {
    if(LockOrUnlock(fd_, false) < 0) {
      ok_ = false;
      err_ = strerror(errno);
    }
  }
  bool Ok() const {
    return ok_;
  }
  string Error() const {
    return err_;
  }
 private:
  static int LockOrUnlock(int fd, bool lock) {
    errno = 0;
    struct flock f;
    memset(&f, 0, sizeof(f));
    f.l_type = (lock ? F_WRLCK : F_UNLCK);
    f.l_whence = SEEK_SET;
    f.l_start = 0;
    f.l_len = 0;        // Lock/unlock entire file
    return fcntl(fd, F_SETLK, &f);
  }
  int fd_;
  bool ok_;
  string err_;
 }; // class FileLock
 }// namespace limonp
 #endif // LIMONP_FILELOCK_HPP
--- a/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp
@ -1,7 +0,0 @@
 #ifndef LIMONP_FORCE_PUBLIC_H
 #define LIMONP_FORCE_PUBLIC_H
 #define private public
 #define protected public
 #endif // LIMONP_FORCE_PUBLIC_H
--- a/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp
@ -1,142 +0,0 @@
 #ifndef LIMONP_LOCAL_VECTOR_HPP
 #define LIMONP_LOCAL_VECTOR_HPP
 #include <iostream>
 #include <stdlib.h>
 #include <assert.h>
 #include <string.h>
 namespace limonp {
 using namespace std;
 /*
 * LocalVector<T> : T must be primitive type (char , int, size_t), if T is struct or class, LocalVector<T> may be dangerous..
 * LocalVector<T> is simple and not well-tested.
 */
 const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
 template <class T>
 class LocalVector {
 public:
  typedef const T* const_iterator ;
  typedef T value_type;
  typedef size_t size_type;
 private:
  T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
  T * ptr_;
  size_t size_;
  size_t capacity_;
 public:
  LocalVector() {
    init_();
  };
  LocalVector(const LocalVector<T>& vec) {
    init_();
    *this = vec;
  }
  LocalVector(const_iterator  begin, const_iterator end) { // TODO: make it faster
    init_();
    while(begin != end) {
      push_back(*begin++);
    }
  }
  LocalVector(size_t size, const T& t) { // TODO: make it faster
    init_();
    while(size--) {
      push_back(t);
    }
  }
  ~LocalVector() {
    if(ptr_ != buffer_) {
      free(ptr_);
    }
  };
 public:
  LocalVector<T>& operator = (const LocalVector<T>& vec) {
      if(this == &vec){
          return *this;
      }
    clear();
    size_ = vec.size();
    capacity_ = vec.capacity();
    if(vec.buffer_ == vec.ptr_) {
      memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
      ptr_ = buffer_;
    } else {
      ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
      assert(ptr_);
      memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
    }
    return *this;
  }
 private:
  void init_() {
    ptr_ = buffer_;
    size_ = 0;
    capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
  }
 public:
  T& operator [] (size_t i) {
    return ptr_[i];
  }
  const T& operator [] (size_t i) const {
    return ptr_[i];
  }
  void push_back(const T& t) {
    if(size_ == capacity_) {
      assert(capacity_);
      reserve(capacity_ * 2);
    }
    ptr_[size_ ++ ] = t;
  }
  void reserve(size_t size) {
    if(size <= capacity_) {
      return;
    }
    T * next =  (T*)malloc(sizeof(T) * size);
    assert(next);
    T * old = ptr_;
    ptr_ = next;
    memcpy(ptr_, old, sizeof(T) * capacity_);
    capacity_ = size;
    if(old != buffer_) {
      free(old);
    }
  }
  bool empty() const {
    return 0 == size();
  }
  size_t size() const {
    return size_;
  }
  size_t capacity() const {
    return capacity_;
  }
  const_iterator begin() const {
    return ptr_;
  }
  const_iterator end() const {
    return ptr_ + size_;
  }
  void clear() {
    if(ptr_ != buffer_) {
      free(ptr_);
    }
    init_();
  }
 };
 template <class T>
 ostream & operator << (ostream& os, const LocalVector<T>& vec) {
  if(vec.empty()) {
    return os << "[]";
  }
  os<<"[\""<<vec[0];
  for(size_t i = 1; i < vec.size(); i++) {
    os<<"\", \""<<vec[i];
  }
  os<<"\"]";
  return os;
 }
 }
 #endif
--- a/libchinese-segmentation/cppjieba/limonp/Logging.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Logging.hpp
@ -1,77 +0,0 @@
 #ifndef LIMONP_LOGGING_HPP
 #define LIMONP_LOGGING_HPP
 #include <sstream>
 #include <iostream>
 #include <cassert>
 #include <cstdlib>
 #include <ctime>
 #ifdef XLOG
 #error "XLOG has been defined already"
 #endif // XLOG
 #ifdef XCHECK
 #error "XCHECK has been defined already"
 #endif // XCHECK
 #define XLOG(level) limonp::Logger(limonp::LL_##level, __FILE__, __LINE__).Stream()
 #define XCHECK(exp) if(!(exp)) XLOG(FATAL) << "exp: ["#exp << "] false. "
 namespace limonp {
 enum {
  LL_DEBUG = 0,
  LL_INFO = 1,
  LL_WARNING = 2,
  LL_ERROR = 3,
  LL_FATAL = 4,
 }; // enum
 static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
 class Logger {
 public:
  Logger(size_t level, const char* filename, int lineno)
   : level_(level) {
 #ifdef LOGGING_LEVEL
     if (level_ < LOGGING_LEVEL) {
       return;
     }
 #endif
    assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
    char buf[32];
    time_t now;
    time(&now);
    struct tm result;
    localtime_r(&now, &result);
    strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &result);
    stream_ << buf
      << " " << filename
      << ":" << lineno
      << " " << LOG_LEVEL_ARRAY[level_]
      << " ";
  }
  ~Logger() {
 #ifdef LOGGING_LEVEL
     if (level_ < LOGGING_LEVEL) {
       return;
     }
 #endif
    std::cerr << stream_.str() << std::endl;
    if (level_ == LL_FATAL) {
      abort();
    }
  }
  std::ostream& Stream() {
    return stream_;
  }
 private:
  std::ostringstream stream_;
  size_t level_;
 }; // class Logger
 } // namespace limonp
 #endif // LIMONP_LOGGING_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Md5.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Md5.hpp
@ -1,415 +0,0 @@
 /****************************************************************************
 **Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991
 **              2020, KylinSoft Co., Ltd.
 **All rights reserved.
 **
 **License to copy and use this software is granted provided that it
 **is identified as the "RSA Data Security, Inc. MD5 Message-Digest
 **Algorithm" in all material mentioning or referencing this software
 **or this function.
 **
 **License is also granted to make and use derivative works provided
 **that such works are identified as "derived from the RSA Data
 **Security, Inc. MD5 Message-Digest Algorithm" in all material
 **mentioning or referencing the derived work.
 **
 **RSA Data Security, Inc. makes no representations concerning either
 **the merchantability of this software or the suitability of this
 **software for any particular purpose. It is provided "as is"
 **without express or implied warranty of any kind.
 **
 **These notices must be retained in any copies of any part of this
 **documentation and/or software.
 **
 **
 **
 **The original md5 implementation avoids external libraries.
 **This version has dependency on stdio.h for file input and
 **string.h for memcpy.
 **
 ****************************************************************************/
 #ifndef __MD5_H__
 #define __MD5_H__
 #include <cstdio>
 #include <cstring>
 #include <iostream>
 namespace limonp {
 //#pragma region MD5 defines
 // Constants for MD5Transform routine.
 #define S11 7
 #define S12 12
 #define S13 17
 #define S14 22
 #define S21 5
 #define S22 9
 #define S23 14
 #define S24 20
 #define S31 4
 #define S32 11
 #define S33 16
 #define S34 23
 #define S41 6
 #define S42 10
 #define S43 15
 #define S44 21
 // F, G, H and I are basic MD5 functions.
 #define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
 #define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
 #define H(x, y, z) ((x) ^ (y) ^ (z))
 #define I(x, y, z) ((y) ^ ((x) | (~z)))
 // ROTATE_LEFT rotates x left n bits.
 #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
 // FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
 // Rotation is separate from addition to prevent recomputation.
 #define FF(a, b, c, d, x, s, ac) { \
  (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
  (a) = ROTATE_LEFT ((a), (s)); \
  (a) += (b); \
  }
 #define GG(a, b, c, d, x, s, ac) { \
  (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
  (a) = ROTATE_LEFT ((a), (s)); \
  (a) += (b); \
  }
 #define HH(a, b, c, d, x, s, ac) { \
  (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
  (a) = ROTATE_LEFT ((a), (s)); \
  (a) += (b); \
  }
 #define II(a, b, c, d, x, s, ac) { \
  (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
  (a) = ROTATE_LEFT ((a), (s)); \
  (a) += (b); \
  }
 //#pragma endregion
 typedef unsigned char BYTE ;
 // POINTER defines a generic pointer type
 typedef unsigned char *POINTER;
 // UINT2 defines a two byte word
 typedef unsigned short int UINT2;
 // UINT4 defines a four byte word
 typedef unsigned int UINT4;
 static unsigned char PADDING[64] = {
    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 // convenient object that wraps
 // the C-functions for use in C++ only
 class MD5 {
 private:
    struct __context_t {
        UINT4 state[4];                                   /* state (ABCD) */
        UINT4 count[2];        /* number of bits, modulo 2^64 (lsb first) */
        unsigned char buffer[64];                         /* input buffer */
    } context ;
    //#pragma region static helper functions
    // The core of the MD5 algorithm is here.
    // MD5 basic transformation. Transforms state based on block.
    static void MD5Transform(UINT4 state[4], unsigned char block[64]) {
        UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
        Decode(x, block, 64);
        /* Round 1 */
        FF(a, b, c, d, x[ 0], S11, 0xd76aa478);  /* 1 */
        FF(d, a, b, c, x[ 1], S12, 0xe8c7b756);  /* 2 */
        FF(c, d, a, b, x[ 2], S13, 0x242070db);  /* 3 */
        FF(b, c, d, a, x[ 3], S14, 0xc1bdceee);  /* 4 */
        FF(a, b, c, d, x[ 4], S11, 0xf57c0faf);  /* 5 */
        FF(d, a, b, c, x[ 5], S12, 0x4787c62a);  /* 6 */
        FF(c, d, a, b, x[ 6], S13, 0xa8304613);  /* 7 */
        FF(b, c, d, a, x[ 7], S14, 0xfd469501);  /* 8 */
        FF(a, b, c, d, x[ 8], S11, 0x698098d8);  /* 9 */
        FF(d, a, b, c, x[ 9], S12, 0x8b44f7af);  /* 10 */
        FF(c, d, a, b, x[10], S13, 0xffff5bb1);  /* 11 */
        FF(b, c, d, a, x[11], S14, 0x895cd7be);  /* 12 */
        FF(a, b, c, d, x[12], S11, 0x6b901122);  /* 13 */
        FF(d, a, b, c, x[13], S12, 0xfd987193);  /* 14 */
        FF(c, d, a, b, x[14], S13, 0xa679438e);  /* 15 */
        FF(b, c, d, a, x[15], S14, 0x49b40821);  /* 16 */
        /* Round 2 */
        GG(a, b, c, d, x[ 1], S21, 0xf61e2562);  /* 17 */
        GG(d, a, b, c, x[ 6], S22, 0xc040b340);  /* 18 */
        GG(c, d, a, b, x[11], S23, 0x265e5a51);  /* 19 */
        GG(b, c, d, a, x[ 0], S24, 0xe9b6c7aa);  /* 20 */
        GG(a, b, c, d, x[ 5], S21, 0xd62f105d);  /* 21 */
        GG(d, a, b, c, x[10], S22,  0x2441453);  /* 22 */
        GG(c, d, a, b, x[15], S23, 0xd8a1e681);  /* 23 */
        GG(b, c, d, a, x[ 4], S24, 0xe7d3fbc8);  /* 24 */
        GG(a, b, c, d, x[ 9], S21, 0x21e1cde6);  /* 25 */
        GG(d, a, b, c, x[14], S22, 0xc33707d6);  /* 26 */
        GG(c, d, a, b, x[ 3], S23, 0xf4d50d87);  /* 27 */
        GG(b, c, d, a, x[ 8], S24, 0x455a14ed);  /* 28 */
        GG(a, b, c, d, x[13], S21, 0xa9e3e905);  /* 29 */
        GG(d, a, b, c, x[ 2], S22, 0xfcefa3f8);  /* 30 */
        GG(c, d, a, b, x[ 7], S23, 0x676f02d9);  /* 31 */
        GG(b, c, d, a, x[12], S24, 0x8d2a4c8a);  /* 32 */
        /* Round 3 */
        HH(a, b, c, d, x[ 5], S31, 0xfffa3942);  /* 33 */
        HH(d, a, b, c, x[ 8], S32, 0x8771f681);  /* 34 */
        HH(c, d, a, b, x[11], S33, 0x6d9d6122);  /* 35 */
        HH(b, c, d, a, x[14], S34, 0xfde5380c);  /* 36 */
        HH(a, b, c, d, x[ 1], S31, 0xa4beea44);  /* 37 */
        HH(d, a, b, c, x[ 4], S32, 0x4bdecfa9);  /* 38 */
        HH(c, d, a, b, x[ 7], S33, 0xf6bb4b60);  /* 39 */
        HH(b, c, d, a, x[10], S34, 0xbebfbc70);  /* 40 */
        HH(a, b, c, d, x[13], S31, 0x289b7ec6);  /* 41 */
        HH(d, a, b, c, x[ 0], S32, 0xeaa127fa);  /* 42 */
        HH(c, d, a, b, x[ 3], S33, 0xd4ef3085);  /* 43 */
        HH(b, c, d, a, x[ 6], S34,  0x4881d05);  /* 44 */
        HH(a, b, c, d, x[ 9], S31, 0xd9d4d039);  /* 45 */
        HH(d, a, b, c, x[12], S32, 0xe6db99e5);  /* 46 */
        HH(c, d, a, b, x[15], S33, 0x1fa27cf8);  /* 47 */
        HH(b, c, d, a, x[ 2], S34, 0xc4ac5665);  /* 48 */
        /* Round 4 */
        II(a, b, c, d, x[ 0], S41, 0xf4292244);  /* 49 */
        II(d, a, b, c, x[ 7], S42, 0x432aff97);  /* 50 */
        II(c, d, a, b, x[14], S43, 0xab9423a7);  /* 51 */
        II(b, c, d, a, x[ 5], S44, 0xfc93a039);  /* 52 */
        II(a, b, c, d, x[12], S41, 0x655b59c3);  /* 53 */
        II(d, a, b, c, x[ 3], S42, 0x8f0ccc92);  /* 54 */
        II(c, d, a, b, x[10], S43, 0xffeff47d);  /* 55 */
        II(b, c, d, a, x[ 1], S44, 0x85845dd1);  /* 56 */
        II(a, b, c, d, x[ 8], S41, 0x6fa87e4f);  /* 57 */
        II(d, a, b, c, x[15], S42, 0xfe2ce6e0);  /* 58 */
        II(c, d, a, b, x[ 6], S43, 0xa3014314);  /* 59 */
        II(b, c, d, a, x[13], S44, 0x4e0811a1);  /* 60 */
        II(a, b, c, d, x[ 4], S41, 0xf7537e82);  /* 61 */
        II(d, a, b, c, x[11], S42, 0xbd3af235);  /* 62 */
        II(c, d, a, b, x[ 2], S43, 0x2ad7d2bb);  /* 63 */
        II(b, c, d, a, x[ 9], S44, 0xeb86d391);  /* 64 */
        state[0] += a;
        state[1] += b;
        state[2] += c;
        state[3] += d;
        // Zeroize sensitive information.
        memset((POINTER)x, 0, sizeof(x));
    }
    // Encodes input (UINT4) into output (unsigned char). Assumes len is
    // a multiple of 4.
    static void Encode(unsigned char *output, UINT4 *input, unsigned int len) {
        unsigned int i, j;
        for(i = 0, j = 0; j < len; i++, j += 4) {
            output[j] = (unsigned char)(input[i] & 0xff);
            output[j + 1] = (unsigned char)((input[i] >> 8) & 0xff);
            output[j + 2] = (unsigned char)((input[i] >> 16) & 0xff);
            output[j + 3] = (unsigned char)((input[i] >> 24) & 0xff);
        }
    }
    // Decodes input (unsigned char) into output (UINT4). Assumes len is
    // a multiple of 4.
    static void Decode(UINT4 *output, unsigned char *input, unsigned int len) {
        unsigned int i, j;
        for(i = 0, j = 0; j < len; i++, j += 4)
            output[i] = ((UINT4)input[j]) | (((UINT4)input[j + 1]) << 8) |
                        (((UINT4)input[j + 2]) << 16) | (((UINT4)input[j + 3]) << 24);
    }
    //#pragma endregion
 public:
    // MAIN FUNCTIONS
    MD5() {
        Init() ;
    }
    // MD5 initialization. Begins an MD5 operation, writing a new context.
    void Init() {
        context.count[0] = context.count[1] = 0;
        // Load magic initialization constants.
        context.state[0] = 0x67452301;
        context.state[1] = 0xefcdab89;
        context.state[2] = 0x98badcfe;
        context.state[3] = 0x10325476;
    }
    // MD5 block update operation. Continues an MD5 message-digest
    // operation, processing another message block, and updating the
    // context.
    void Update(
        unsigned char *input,   // input block
        unsigned int inputLen) {  // length of input block
        unsigned int i, index, partLen;
        // Compute number of bytes mod 64
        index = (unsigned int)((context.count[0] >> 3) & 0x3F);
        // Update number of bits
        if((context.count[0] += ((UINT4)inputLen << 3))
                < ((UINT4)inputLen << 3))
            context.count[1]++;
        context.count[1] += ((UINT4)inputLen >> 29);
        partLen = 64 - index;
        // Transform as many times as possible.
        if(inputLen >= partLen) {
            memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
            MD5Transform(context.state, context.buffer);
            for(i = partLen; i + 63 < inputLen; i += 64)
                MD5Transform(context.state, &input[i]);
            index = 0;
        } else
            i = 0;
        /* Buffer remaining input */
        memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen - i);
    }
    // MD5 finalization. Ends an MD5 message-digest operation, writing the
    // the message digest and zeroizing the context.
    // Writes to digestRaw
    void Final() {
        unsigned char bits[8];
        unsigned int index, padLen;
        // Save number of bits
        Encode(bits, context.count, 8);
        // Pad out to 56 mod 64.
        index = (unsigned int)((context.count[0] >> 3) & 0x3f);
        padLen = (index < 56) ? (56 - index) : (120 - index);
        Update(PADDING, padLen);
        // Append length (before padding)
        Update(bits, 8);
        // Store state in digest
        Encode(digestRaw, context.state, 16);
        // Zeroize sensitive information.
        memset((POINTER)&context, 0, sizeof(context));
        writeToString() ;
    }
    /// Buffer must be 32+1 (nul) = 33 chars long at least
    void writeToString() {
        int pos ;
        for(pos = 0 ; pos < 16 ; pos++)
            sprintf(digestChars + (pos * 2), "%02x", digestRaw[pos]) ;
    }
 public:
    // an MD5 digest is a 16-byte number (32 hex digits)
    BYTE digestRaw[ 16 ] ;
    // This version of the digest is actually
    // a "printf'd" version of the digest.
    char digestChars[ 33 ] ;
    /// Load a file from disk and digest it
    // Digests a file and returns the result.
    const char* digestFile(const char *filename) {
        if(NULL == filename || strcmp(filename, "") == 0)
            return NULL;
        Init() ;
        FILE *file;
        unsigned char buffer[1024] ;
        if((file = fopen(filename, "rb")) == NULL) {
            return NULL;
        }
        int len;
        while((len = fread(buffer, 1, 1024, file)))
            Update(buffer, len) ;
        Final();
        fclose(file);
        return digestChars ;
    }
    /// Digests a byte-array already in memory
    const char* digestMemory(BYTE *memchunk, int len) {
        if(NULL == memchunk)
            return NULL;
        Init() ;
        Update(memchunk, len) ;
        Final() ;
        return digestChars ;
    }
    // Digests a string and prints the result.
    const char* digestString(const char *string) {
        if(string == NULL)
            return NULL;
        Init() ;
        Update((unsigned char*)string, strlen(string)) ;
        Final() ;
        return digestChars ;
    }
 };
 inline bool md5String(const char* str, std::string& res) {
    if(NULL == str) {
        res = "";
        return false;
    }
    MD5 md5;
    const char *pRes = md5.digestString(str);
    if(NULL == pRes) {
        res = "";
        return false;
    }
    res = pRes;
    return true;
 }
 inline bool md5File(const char* filepath, std::string& res) {
    if(NULL == filepath || strcmp(filepath, "") == 0) {
        res = "";
        return false;
    }
    MD5 md5;
    const char *pRes = md5.digestFile(filepath);
    if(NULL == pRes) {
        res = "";
        return false;
    }
    res = pRes;
    return true;
 }
 }
 #endif
--- a/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp
@ -1,51 +0,0 @@
 #ifndef LIMONP_MUTEX_LOCK_HPP
 #define LIMONP_MUTEX_LOCK_HPP
 #include <pthread.h>
 #include "NonCopyable.hpp"
 #include "Logging.hpp"
 namespace limonp {
 class MutexLock: NonCopyable {
 public:
  MutexLock() {
    XCHECK(!pthread_mutex_init(&mutex_, NULL));
  }
  ~MutexLock() {
    XCHECK(!pthread_mutex_destroy(&mutex_));
  }
  pthread_mutex_t* GetPthreadMutex() {
    return &mutex_;
  }
 private:
  void Lock() {
    XCHECK(!pthread_mutex_lock(&mutex_));
  }
  void Unlock() {
    XCHECK(!pthread_mutex_unlock(&mutex_));
  }
  friend class MutexLockGuard;
  pthread_mutex_t mutex_;
 }; // class MutexLock
 class MutexLockGuard: NonCopyable {
 public:
  explicit MutexLockGuard(MutexLock & mutex)
    : mutex_(mutex) {
    mutex_.Lock();
  }
  ~MutexLockGuard() {
    mutex_.Unlock();
  }
 private:
  MutexLock & mutex_;
 }; // class MutexLockGuard
 #define MutexLockGuard(x) XCHECK(false);
 } // namespace limonp
 #endif // LIMONP_MUTEX_LOCK_HPP
--- a/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp
@ -1,21 +0,0 @@
 /************************************
 ************************************/
 #ifndef LIMONP_NONCOPYABLE_H
 #define LIMONP_NONCOPYABLE_H
 namespace limonp {
 class NonCopyable {
 protected:
  NonCopyable() {
  }
  ~NonCopyable() {
  }
 private:
  NonCopyable(const NonCopyable& );
  const NonCopyable& operator=(const NonCopyable& );
 }; // class NonCopyable
 } // namespace limonp
 #endif // LIMONP_NONCOPYABLE_H
--- a/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp
@ -1,157 +0,0 @@
 #ifndef LIMONP_STD_EXTEMSION_HPP
 #define LIMONP_STD_EXTEMSION_HPP
 #include <map>
 #ifdef __APPLE__
 #include <unordered_map>
 #include <unordered_set>
 #elif(__cplusplus >= 201103L)
 #include <unordered_map>
 #include <unordered_set>
 #elif defined _MSC_VER
 #include <unordered_map>
 #include <unordered_set>
 #else
 #include <tr1/unordered_map>
 #include <tr1/unordered_set>
 namespace std {
 using std::tr1::unordered_map;
 using std::tr1::unordered_set;
 }
 #endif
 #include <set>
 #include <string>
 #include <vector>
 #include <deque>
 #include <fstream>
 #include <sstream>
 namespace std {
 template<typename T>
 ostream& operator << (ostream& os, const vector<T>& v) {
  if(v.empty()) {
    return os << "[]";
  }
  os<<"["<<v[0];
  for(size_t i = 1; i < v.size(); i++) {
    os<<", "<<v[i];
  }
  os<<"]";
  return os;
 }
 template<>
 inline ostream& operator << (ostream& os, const vector<string>& v) {
  if(v.empty()) {
    return os << "[]";
  }
  os<<"[\""<<v[0];
  for(size_t i = 1; i < v.size(); i++) {
    os<<"\", \""<<v[i];
  }
  os<<"\"]";
  return os;
 }
 template<typename T>
 ostream& operator << (ostream& os, const deque<T>& dq) {
  if(dq.empty()) {
    return os << "[]";
  }
  os<<"[\""<<dq[0];
  for(size_t i = 1; i < dq.size(); i++) {
    os<<"\", \""<<dq[i];
  }
  os<<"\"]";
  return os;
 }
 template<class T1, class T2>
 ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
  os << pr.first << ":" << pr.second ;
  return os;
 }
 template<class T>
 string& operator << (string& str, const T& obj) {
  stringstream ss;
  ss << obj; // call ostream& operator << (ostream& os,
  return str = ss.str();
 }
 template<class T1, class T2>
 ostream& operator << (ostream& os, const map<T1, T2>& mp) {
  if(mp.empty()) {
    os<<"{}";
    return os;
  }
  os<<'{';
  typename map<T1, T2>::const_iterator it = mp.begin();
  os<<*it;
  it++;
  while(it != mp.end()) {
    os<<", "<<*it;
    it++;
  }
  os<<'}';
  return os;
 }
 template<class T1, class T2>
 ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
  if(mp.empty()) {
    return os << "{}";
  }
  os<<'{';
  typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
  os<<*it;
  it++;
  while(it != mp.end()) {
    os<<", "<<*it++;
  }
  return os<<'}';
 }
 template<class T>
 ostream& operator << (ostream& os, const set<T>& st) {
  if(st.empty()) {
    os << "{}";
    return os;
  }
  os<<'{';
  typename set<T>::const_iterator it = st.begin();
  os<<*it;
  it++;
  while(it != st.end()) {
    os<<", "<<*it;
    it++;
  }
  os<<'}';
  return os;
 }
 template<class KeyType, class ContainType>
 bool IsIn(const ContainType& contain, const KeyType& key) {
  return contain.end() != contain.find(key);
 }
 template<class T>
 basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
  return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
 }
 template<class T>
 ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
  ostreambuf_iterator<T> itr (ofs);
  copy(s.begin(), s.end(), itr);
  return ofs;
 }
 } // namespace std
 #endif
--- a/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp
@ -1,382 +0,0 @@
 /************************************
 * file enc : ascii
 * author   : wuyanyi09@gmail.com
 ************************************/
 #ifndef LIMONP_STR_FUNCTS_H
 #define LIMONP_STR_FUNCTS_H
 #include <stdint.h>
 #include <stdio.h>
 #include <stdarg.h>
 #include <memory.h>
 #include <sys/types.h>
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
 #include <algorithm>
 #include <cctype>
 #include <map>
 #include <functional>
 #include <locale>
 #include <sstream>
 #include <iterator>
 #include <algorithm>
 #include "StdExtension.hpp"
 namespace limonp {
 using namespace std;
 inline string StringFormat(const char* fmt, ...) {
  int size = 256;
  std::string str;
  va_list ap;
  while (1) {
    str.resize(size);
    va_start(ap, fmt);
    int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
    va_end(ap);
    if (n > -1 && n < size) {
      str.resize(n);
      return str;
    }
    if (n > -1)
      size = n + 1;
    else
      size *= 2;
  }
  return str;
 }
 template<class T>
 void Join(T begin, T end, string& res, const string& connector) {
  if(begin == end) {
    return;
  }
  stringstream ss;
  ss<<*begin;
  begin++;
  while(begin != end) {
    ss << connector << *begin;
    begin ++;
  }
  res = ss.str();
 }
 template<class T>
 string Join(T begin, T end, const string& connector) {
  string res;
  Join(begin ,end, res, connector);
  return res;
 }
 inline string& Upper(string& str) {
  transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
  return str;
 }
 inline string& Lower(string& str) {
  transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
  return str;
 }
 inline bool IsSpace(unsigned c) {
  // when passing large int as the argument of isspace, it core dump, so here need a type cast.
  return c > 0xff ? false : std::isspace(c & 0xff);
 }
 inline std::string& LTrim(std::string &s) {
  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
  return s;
 }
 inline std::string& RTrim(std::string &s) {
  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
  return s;
 }
 inline std::string& Trim(std::string &s) {
  return LTrim(RTrim(s));
 }
 inline std::string& LTrim(std::string & s, char x) {
  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
  return s;
 }
 inline std::string& RTrim(std::string & s, char x) {
  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
  return s;
 }
 inline std::string& Trim(std::string &s, char x) {
  return LTrim(RTrim(s, x), x);
 }
 inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
  res.clear();
  size_t Start = 0;
  size_t end = 0;
  string sub;
  while(Start < src.size()) {
    end = src.find_first_of(pattern, Start);
    if(string::npos == end || res.size() >= maxsplit) {
      sub = src.substr(Start);
      res.push_back(sub);
      return;
    }
    sub = src.substr(Start, end - Start);
    res.push_back(sub);
    Start = end + 1;
  }
  return;
 }
 inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
  vector<string> res;
  Split(src, res, pattern, maxsplit);
  return res;
 }
 inline bool StartsWith(const string& str, const string& prefix) {
  if(prefix.length() > str.length()) {
    return false;
  }
  return 0 == str.compare(0, prefix.length(), prefix);
 }
 inline bool EndsWith(const string& str, const string& suffix) {
  if(suffix.length() > str.length()) {
    return false;
  }
  return 0 == str.compare(str.length() -  suffix.length(), suffix.length(), suffix);
 }
 inline bool IsInStr(const string& str, char ch) {
  return str.find(ch) != string::npos;
 }
 inline uint16_t TwocharToUint16(char high, char low) {
  return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
 }
 template <class Uint16Container>
 bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
  if(!str) {
    return false;
  }
  char ch1, ch2;
  uint16_t tmp;
  vec.clear();
  for(size_t i = 0; i < len;) {
    if(!(str[i] & 0x80)) { // 0xxxxxxx
      vec.push_back(str[i]);
      i++;
    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
      ch1 = (str[i] >> 2) & 0x07;
      ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
      vec.push_back(tmp);
      i += 2;
    } else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
      ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
      ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
      vec.push_back(tmp);
      i += 3;
    } else {
      return false;
    }
  }
  return true;
 }
 template <class Uint16Container>
 bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
  return Utf8ToUnicode(str.c_str(), str.size(), vec);
 }
 template <class Uint32Container>
 bool Utf8ToUnicode32(const char * str, size_t size, Uint32Container& vec) {
  uint32_t tmp;
  vec.clear();
  for(size_t i = 0; i < size;) {
    if(!(str[i] & 0x80)) { // 0xxxxxxx
      // 7bit, total 7bit
      tmp = (uint8_t)(str[i]) & 0x7f;
      i++;
    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < size) { // 110xxxxxx
      // 5bit, total 5bit
      tmp = (uint8_t)(str[i]) & 0x1f;
      // 6bit, total 11bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+1]) & 0x3f;
      i += 2;
    } else if((uint8_t)str[i] <= 0xef && i + 2 < size) { // 1110xxxxxx
      // 4bit, total 4bit
      tmp = (uint8_t)(str[i]) & 0x0f;
      // 6bit, total 10bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+1]) & 0x3f;
      // 6bit, total 16bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+2]) & 0x3f;
      i += 3;
    } else if((uint8_t)str[i] <= 0xf7 && i + 3 < size) { // 11110xxxx
      // 3bit, total 3bit
      tmp = (uint8_t)(str[i]) & 0x07;
      // 6bit, total 9bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+1]) & 0x3f;
      // 6bit, total 15bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+2]) & 0x3f;
      // 6bit, total 21bit
      tmp <<= 6;
      tmp |= (uint8_t)(str[i+3]) & 0x3f;
      i += 4;
    } else {
      return false;
    }
    vec.push_back(tmp);
  }
  return true;
 }
 template <class Uint32Container>
 bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
    return Utf8ToUnicode32(str.data(), str.size(), vec);
 }
 inline int UnicodeToUtf8Bytes(uint32_t ui){
    if(ui <= 0x7f) {
        return 1;
    } else if(ui <= 0x7ff) {
        return 2;
    } else if(ui <= 0xffff) {
        return 3;
    } else {
        return 4;
    }
 }
 template <class Uint32ContainerConIter>
 void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
  res.clear();
  uint32_t ui;
  while(begin != end) {
    ui = *begin;
    if(ui <= 0x7f) {
      res += char(ui);
    } else if(ui <= 0x7ff) {
      res += char(((ui >> 6) & 0x1f) | 0xc0);
      res += char((ui & 0x3f) | 0x80);
    } else if(ui <= 0xffff) {
      res += char(((ui >> 12) & 0x0f) | 0xe0);
      res += char(((ui >> 6) & 0x3f) | 0x80);
      res += char((ui & 0x3f) | 0x80);
    } else {
      res += char(((ui >> 18) & 0x03) | 0xf0);
      res += char(((ui >> 12) & 0x3f) | 0x80);
      res += char(((ui >> 6) & 0x3f) | 0x80);
      res += char((ui & 0x3f) | 0x80);
    }
    begin ++;
  }
 }
 template <class Uint16ContainerConIter>
 void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
  res.clear();
  uint16_t ui;
  while(begin != end) {
    ui = *begin;
    if(ui <= 0x7f) {
      res += char(ui);
    } else if(ui <= 0x7ff) {
      res += char(((ui>>6) & 0x1f) | 0xc0);
      res += char((ui & 0x3f) | 0x80);
    } else {
      res += char(((ui >> 12) & 0x0f )| 0xe0);
      res += char(((ui>>6) & 0x3f )| 0x80 );
      res += char((ui & 0x3f) | 0x80);
    }
    begin ++;
  }
 }
 template <class Uint16Container>
 bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
  vec.clear();
  if(!str) {
    return true;
  }
  size_t i = 0;
  while(i < len) {
    if(0 == (str[i] & 0x80)) {
      vec.push_back(uint16_t(str[i]));
      i++;
    } else {
      if(i + 1 < len) { //&& (str[i+1] & 0x80))
        uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
        vec.push_back(tmp);
        i += 2;
      } else {
        return false;
      }
    }
  }
  return true;
 }
 template <class Uint16Container>
 bool GBKTrans(const string& str, Uint16Container& vec) {
  return GBKTrans(str.c_str(), str.size(), vec);
 }
 template <class Uint16ContainerConIter>
 void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
  res.clear();
  //pair<char, char> pa;
  char first, second;
  while(begin != end) {
    //pa = uint16ToChar2(*begin);
    first = ((*begin)>>8) & 0x00ff;
    second = (*begin) & 0x00ff;
    if(first & 0x80) {
      res += first;
      res += second;
    } else {
      res += second;
    }
    begin++;
  }
 }
 /*
 * format example: "%Y-%m-%d %H:%M:%S"
 */
 // inline void GetTime(const string& format, string&  timeStr) {
 //   time_t timeNow;
 //   time(&timeNow);
 //   timeStr.resize(64);
 //   size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
 //   timeStr.resize(len);
 // }
 inline string PathJoin(const string& path1, const string& path2) {
  if(EndsWith(path1, "/")) {
    return path1 + path2;
  }
  return path1 + "/" + path2;
 }
 }
 #endif
--- a/libchinese-segmentation/cppjieba/limonp/Thread.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Thread.hpp
@ -1,44 +0,0 @@
 #ifndef LIMONP_THREAD_HPP
 #define LIMONP_THREAD_HPP
 #include "Logging.hpp"
 #include "NonCopyable.hpp"
 namespace limonp {
 class IThread: NonCopyable {
 public:
  IThread(): isStarted(false), isJoined(false) {
  }
  virtual ~IThread() {
    if(isStarted && !isJoined) {
      XCHECK(!pthread_detach(thread_));
    }
  };
  virtual void Run() = 0;
  void Start() {
    XCHECK(!isStarted);
    XCHECK(!pthread_create(&thread_, NULL, Worker, this));
    isStarted = true;
  }
  void Join() {
    XCHECK(!isJoined);
    XCHECK(!pthread_join(thread_, NULL));
    isJoined = true;
  }
 private:
  static void * Worker(void * data) {
    IThread * ptr = (IThread* ) data;
    ptr->Run();
    return NULL;
  }
  pthread_t thread_;
  bool isStarted;
  bool isJoined;
 }; // class IThread
 } // namespace limonp
 #endif // LIMONP_THREAD_HPP
--- a/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp
@ -1,86 +0,0 @@
 #ifndef LIMONP_THREAD_POOL_HPP
 #define LIMONP_THREAD_POOL_HPP
 #include "Thread.hpp"
 #include "BlockingQueue.hpp"
 #include "BoundedBlockingQueue.hpp"
 #include "Closure.hpp"
 namespace limonp {
 using namespace std;
 //class ThreadPool;
 class ThreadPool: NonCopyable {
 public:
  class Worker: public IThread {
   public:
    Worker(ThreadPool* pool): ptThreadPool_(pool) {
      assert(ptThreadPool_);
    }
    virtual ~Worker() {
    }
    virtual void Run() {
      while (true) {
        ClosureInterface* closure = ptThreadPool_->queue_.Pop();
        if (closure == NULL) {
          break;
        }
        try {
          closure->Run();
        } catch(std::exception& e) {
          XLOG(ERROR) << e.what();
        } catch(...) {
          XLOG(ERROR) << " unknown exception.";
        }
        delete closure;
      }
    }
   private:
    ThreadPool * ptThreadPool_;
  }; // class Worker
  ThreadPool(size_t thread_num)
    : threads_(thread_num), 
      queue_(thread_num) {
    assert(thread_num);
    for(size_t i = 0; i < threads_.size(); i ++) {
      threads_[i] = new Worker(this);
    }
  }
  ~ThreadPool() {
    Stop();
  }
  void Start() {
    for(size_t i = 0; i < threads_.size(); i++) {
      threads_[i]->Start();
    }
  }
  void Stop() {
    for(size_t i = 0; i < threads_.size(); i ++) {
      queue_.Push(NULL);
    }
    for(size_t i = 0; i < threads_.size(); i ++) {
      threads_[i]->Join();
      delete threads_[i];
    }
    threads_.clear();
  }
  void Add(ClosureInterface* task) {
    assert(task);
    queue_.Push(task);
  }
 private:
  friend class Worker;
  vector<IThread*> threads_;
  BoundedBlockingQueue<ClosureInterface*> queue_;
 }; // class ThreadPool
 } // namespace limonp
 #endif // LIMONP_THREAD_POOL_HPP
--- a/libchinese-segmentation/cppjieba/limonp/limonp.pri
+++ b/libchinese-segmentation/cppjieba/limonp/limonp.pri
@ -1,22 +0,0 @@
 INCLUDEPATH += $$PWD
 HEADERS += \
    $$PWD/ArgvContext.hpp \
    $$PWD/BlockingQueue.hpp \
    $$PWD/BoundedBlockingQueue.hpp \
    $$PWD/BoundedQueue.hpp \
    $$PWD/Closure.hpp \
    $$PWD/Colors.hpp \
    $$PWD/Condition.hpp \
    $$PWD/Config.hpp \
    $$PWD/FileLock.hpp \
    $$PWD/ForcePublic.hpp \
    $$PWD/LocalVector.hpp \
    $$PWD/Logging.hpp \
    $$PWD/Md5.hpp \
    $$PWD/MutexLock.hpp \
    $$PWD/NonCopyable.hpp \
    $$PWD/StdExtension.hpp \
    $$PWD/StringUtil.hpp \
    $$PWD/Thread.hpp \
    $$PWD/ThreadPool.hpp
--- a/libchinese-segmentation/development-files/header-files/ChineseSegmentation
+++ b/libchinese-segmentation/development-files/header-files/ChineseSegmentation
@ -1 +0,0 @@
 #include "chinese-segmentation.h"
--- a/libchinese-segmentation/development-files/header-files/HanZiToPinYin
+++ b/libchinese-segmentation/development-files/header-files/HanZiToPinYin
@ -1 +0,0 @@
 #include "hanzi-to-pinyin.h"
--- a/libchinese-segmentation/dict/README.md
+++ b/libchinese-segmentation/dict/README.md
@ -1,31 +0,0 @@
 # CppJieba字典
 文件后缀名代表的是词典的编码方式。
 比如filename.utf8 是 utf8编码，filename.gbk 是 gbk编码方式。
 ## 分词
 ### jieba.dict.utf8/gbk
 作为最大概率法(MPSegment: Max Probability)分词所使用的词典。
 ### hmm_model.utf8/gbk
 作为隐式马尔科夫模型(HMMSegment: Hidden Markov Model)分词所使用的词典。
 __对于MixSegment(混合MPSegment和HMMSegment两者)则同时使用以上两个词典__
 ## 关键词抽取
 ### idf.utf8
 IDF(Inverse Document Frequency)
 在KeywordExtractor中，使用的是经典的TF-IDF算法，所以需要这么一个词典提供IDF信息。
 ### stop_words.utf8
 停用词词典
--- a/libchinese-segmentation/dict/hmm_model.utf8
+++ b/libchinese-segmentation/dict/hmm_model.utf8
--- a/libchinese-segmentation/dict/idf.utf8
+++ b/libchinese-segmentation/dict/idf.utf8
--- a/libchinese-segmentation/dict/jieba.dict.utf8
+++ b/libchinese-segmentation/dict/jieba.dict.utf8
--- a/libchinese-segmentation/dict/pinyinWithoutTone.txt
+++ b/libchinese-segmentation/dict/pinyinWithoutTone.txt
--- a/libchinese-segmentation/dict/pos_dict/char_state_tab.utf8
+++ b/libchinese-segmentation/dict/pos_dict/char_state_tab.utf8
--- a/libchinese-segmentation/dict/pos_dict/prob_emit.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_emit.utf8
--- a/libchinese-segmentation/dict/pos_dict/prob_start.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_start.utf8
@ -1,259 +0,0 @@
 #初始状态的概率
 #格式
 #状态:概率
 B,a:-4.7623052146
 B,ad:-6.68006603678
 B,ag:-3.14e+100
 B,an:-8.69708322302
 B,b:-5.01837436211
 B,bg:-3.14e+100
 B,c:-3.42388018495
 B,d:-3.97504752976
 B,df:-8.88897423083
 B,dg:-3.14e+100
 B,e:-8.56355183039
 B,en:-3.14e+100
 B,f:-5.49163041848
 B,g:-3.14e+100
 B,h:-13.53336513
 B,i:-6.11578472756
 B,in:-3.14e+100
 B,j:-5.05761912847
 B,jn:-3.14e+100
 B,k:-3.14e+100
 B,l:-4.90588358466
 B,ln:-3.14e+100
 B,m:-3.6524299819
 B,mg:-3.14e+100
 B,mq:-6.7869530014
 B,n:-1.69662577975
 B,ng:-3.14e+100
 B,nr:-2.23104959138
 B,nrfg:-5.87372217541
 B,nrt:-4.98564273352
 B,ns:-2.8228438315
 B,nt:-4.84609166818
 B,nz:-3.94698846058
 B,o:-8.43349870215
 B,p:-4.20098413209
 B,q:-6.99812385896
 B,qe:-3.14e+100
 B,qg:-3.14e+100
 B,r:-3.40981877908
 B,rg:-3.14e+100
 B,rr:-12.4347528413
 B,rz:-7.94611647157
 B,s:-5.52267359084
 B,t:-3.36474790945
 B,tg:-3.14e+100
 B,u:-9.1639172775
 B,ud:-3.14e+100
 B,ug:-3.14e+100
 B,uj:-3.14e+100
 B,ul:-3.14e+100
 B,uv:-3.14e+100
 B,uz:-3.14e+100
 B,v:-2.67405848743
 B,vd:-9.04472876024
 B,vg:-3.14e+100
 B,vi:-12.4347528413
 B,vn:-4.33156108902
 B,vq:-12.1470707689
 B,w:-3.14e+100
 B,x:-3.14e+100
 B,y:-9.84448567586
 B,yg:-3.14e+100
 B,z:-7.04568111149
 B,zg:-3.14e+100
 E,a:-3.14e+100
 E,ad:-3.14e+100
 E,ag:-3.14e+100
 E,an:-3.14e+100
 E,b:-3.14e+100
 E,bg:-3.14e+100
 E,c:-3.14e+100
 E,d:-3.14e+100
 E,df:-3.14e+100
 E,dg:-3.14e+100
 E,e:-3.14e+100
 E,en:-3.14e+100
 E,f:-3.14e+100
 E,g:-3.14e+100
 E,h:-3.14e+100
 E,i:-3.14e+100
 E,in:-3.14e+100
 E,j:-3.14e+100
 E,jn:-3.14e+100
 E,k:-3.14e+100
 E,l:-3.14e+100
 E,ln:-3.14e+100
 E,m:-3.14e+100
 E,mg:-3.14e+100
 E,mq:-3.14e+100
 E,n:-3.14e+100
 E,ng:-3.14e+100
 E,nr:-3.14e+100
 E,nrfg:-3.14e+100
 E,nrt:-3.14e+100
 E,ns:-3.14e+100
 E,nt:-3.14e+100
 E,nz:-3.14e+100
 E,o:-3.14e+100
 E,p:-3.14e+100
 E,q:-3.14e+100
 E,qe:-3.14e+100
 E,qg:-3.14e+100
 E,r:-3.14e+100
 E,rg:-3.14e+100
 E,rr:-3.14e+100
 E,rz:-3.14e+100
 E,s:-3.14e+100
 E,t:-3.14e+100
 E,tg:-3.14e+100
 E,u:-3.14e+100
 E,ud:-3.14e+100
 E,ug:-3.14e+100
 E,uj:-3.14e+100
 E,ul:-3.14e+100
 E,uv:-3.14e+100
 E,uz:-3.14e+100
 E,v:-3.14e+100
 E,vd:-3.14e+100
 E,vg:-3.14e+100
 E,vi:-3.14e+100
 E,vn:-3.14e+100
 E,vq:-3.14e+100
 E,w:-3.14e+100
 E,x:-3.14e+100
 E,y:-3.14e+100
 E,yg:-3.14e+100
 E,z:-3.14e+100
 E,zg:-3.14e+100
 M,a:-3.14e+100
 M,ad:-3.14e+100
 M,ag:-3.14e+100
 M,an:-3.14e+100
 M,b:-3.14e+100
 M,bg:-3.14e+100
 M,c:-3.14e+100
 M,d:-3.14e+100
 M,df:-3.14e+100
 M,dg:-3.14e+100
 M,e:-3.14e+100
 M,en:-3.14e+100
 M,f:-3.14e+100
 M,g:-3.14e+100
 M,h:-3.14e+100
 M,i:-3.14e+100
 M,in:-3.14e+100
 M,j:-3.14e+100
 M,jn:-3.14e+100
 M,k:-3.14e+100
 M,l:-3.14e+100
 M,ln:-3.14e+100
 M,m:-3.14e+100
 M,mg:-3.14e+100
 M,mq:-3.14e+100
 M,n:-3.14e+100
 M,ng:-3.14e+100
 M,nr:-3.14e+100
 M,nrfg:-3.14e+100
 M,nrt:-3.14e+100
 M,ns:-3.14e+100
 M,nt:-3.14e+100
 M,nz:-3.14e+100
 M,o:-3.14e+100
 M,p:-3.14e+100
 M,q:-3.14e+100
 M,qe:-3.14e+100
 M,qg:-3.14e+100
 M,r:-3.14e+100
 M,rg:-3.14e+100
 M,rr:-3.14e+100
 M,rz:-3.14e+100
 M,s:-3.14e+100
 M,t:-3.14e+100
 M,tg:-3.14e+100
 M,u:-3.14e+100
 M,ud:-3.14e+100
 M,ug:-3.14e+100
 M,uj:-3.14e+100
 M,ul:-3.14e+100
 M,uv:-3.14e+100
 M,uz:-3.14e+100
 M,v:-3.14e+100
 M,vd:-3.14e+100
 M,vg:-3.14e+100
 M,vi:-3.14e+100
 M,vn:-3.14e+100
 M,vq:-3.14e+100
 M,w:-3.14e+100
 M,x:-3.14e+100
 M,y:-3.14e+100
 M,yg:-3.14e+100
 M,z:-3.14e+100
 M,zg:-3.14e+100
 S,a:-3.90253968313
 S,ad:-11.0484584802
 S,ag:-6.95411391796
 S,an:-12.8402179494
 S,b:-6.47288876397
 S,bg:-3.14e+100
 S,c:-4.78696679586
 S,d:-3.90391976418
 S,df:-3.14e+100
 S,dg:-8.9483976513
 S,e:-5.94251300628
 S,en:-3.14e+100
 S,f:-5.19482024998
 S,g:-6.50782681533
 S,h:-8.65056320738
 S,i:-3.14e+100
 S,in:-3.14e+100
 S,j:-4.91199211964
 S,jn:-3.14e+100
 S,k:-6.94032059583
 S,l:-3.14e+100
 S,ln:-3.14e+100
 S,m:-3.26920065212
 S,mg:-10.8253149289
 S,mq:-3.14e+100
 S,n:-3.85514838976
 S,ng:-4.9134348611
 S,nr:-4.48366310396
 S,nrfg:-3.14e+100
 S,nrt:-3.14e+100
 S,ns:-3.14e+100
 S,nt:-12.1470707689
 S,nz:-3.14e+100
 S,o:-8.46446092775
 S,p:-2.98684018136
 S,q:-4.88865861826
 S,qe:-3.14e+100
 S,qg:-3.14e+100
 S,r:-2.76353367841
 S,rg:-10.2752685919
 S,rr:-3.14e+100
 S,rz:-3.14e+100
 S,s:-3.14e+100
 S,t:-3.14e+100
 S,tg:-6.27284253188
 S,u:-6.94032059583
 S,ud:-7.72823016105
 S,ug:-7.53940370266
 S,uj:-6.85251045118
 S,ul:-8.41537131755
 S,uv:-8.15808672229
 S,uz:-9.29925862537
 S,v:-3.05329230341
 S,vd:-3.14e+100
 S,vg:-5.94301818437
 S,vi:-3.14e+100
 S,vn:-11.4539235883
 S,vq:-3.14e+100
 S,w:-3.14e+100
 S,x:-8.42741965607
 S,y:-6.19707946995
 S,yg:-13.53336513
 S,z:-3.14e+100
 S,zg:-3.14e+100
--- a/libchinese-segmentation/dict/pos_dict/prob_trans.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_trans.utf8
--- a/libchinese-segmentation/dict/stop_words.utf8
+++ b/libchinese-segmentation/dict/stop_words.utf8
--- a/libchinese-segmentation/dict/user.dict.utf8
+++ b/libchinese-segmentation/dict/user.dict.utf8
@ -1,4 +0,0 @@
 云计算
 韩玉鉴赏
 蓝翔 nz
 区块链 10 nz
--- a/libchinese-segmentation/hanzi-to-pinyin-private.h
+++ b/libchinese-segmentation/hanzi-to-pinyin-private.h
@ -1,29 +0,0 @@
 #ifndef HANZITOPINYINPRIVATE_H
 #define HANZITOPINYINPRIVATE_H
 #include <QtCore/qglobal.h>
 #include "cppjieba/PinYinTrie.hpp"
 #include "hanzi-to-pinyin.h"
 #define PINYINMANAGER_EXPORT Q_DECL_IMPORT
 using namespace std;
 class PINYINMANAGER_EXPORT HanZiToPinYinPrivate
 {
 public:
    HanZiToPinYinPrivate(HanZiToPinYin *parent = nullptr);
    ~HanZiToPinYinPrivate();
 public:
    template <typename T>
    bool isMultiTone(T &&t) {return m_pinYinTrie->isMultiTone(std::forward<T>(t));}
    bool contains(string &word);
    int getResults(string word, QStringList &results);
 private:
    cppjieba::PinYinTrie *m_pinYinTrie = nullptr;
    HanZiToPinYin *q = nullptr;
 };
 #endif // HANZITOPINYINPRIVATE_H
--- a/libchinese-segmentation/hanzi-to-pinyin.cpp
+++ b/libchinese-segmentation/hanzi-to-pinyin.cpp
@ -1,83 +0,0 @@
 #include "hanzi-to-pinyin.h"
 #include "hanzi-to-pinyin-private.h"
 #include <mutex>
 HanZiToPinYin * HanZiToPinYin::g_pinYinManager = nullptr;
 std::once_flag g_singleFlag;
 bool HanZiToPinYinPrivate::contains(string &word)
 {
    return m_pinYinTrie->contains(word);
 }
 int HanZiToPinYinPrivate::getResults(string word, QStringList &results)
 {
    results.clear();
    if (-1 != m_pinYinTrie->getMultiTonResults(word, results)) {
        return 0;
    }
    QString tmp;
    if (-1 != m_pinYinTrie->getSingleTonResult(word, tmp)) {
        results.append(tmp);
        return 0;
    }
    return -1;
 }
 HanZiToPinYinPrivate::HanZiToPinYinPrivate(HanZiToPinYin *parent) : q(parent)
 {
    const char * const  PINYIN_PATH = "/usr/share/ukui-search/res/dict/pinyinWithoutTone.txt";
    m_pinYinTrie = new cppjieba::PinYinTrie(PINYIN_PATH);
 }
 HanZiToPinYinPrivate::~HanZiToPinYinPrivate()
 {
    if (m_pinYinTrie){
        delete m_pinYinTrie;
        m_pinYinTrie = nullptr;
    }
 }
 HanZiToPinYin * HanZiToPinYin::getInstance()
 {
    call_once(g_singleFlag, []() {
        g_pinYinManager = new HanZiToPinYin;
    });
    return g_pinYinManager;
 }
 bool HanZiToPinYin::contains(string &word)
 {
    return d->contains(word);
 }
 bool HanZiToPinYin::isMultiTone(string &word)
 {
    return d->isMultiTone(word);
 }
 bool HanZiToPinYin::isMultiTone(string &&word)
 {
    return d->isMultiTone(word);
 }
 bool HanZiToPinYin::isMultiTone(const string &word)
 {
    return d->isMultiTone(word);
 }
 bool HanZiToPinYin::isMultiTone(const string &&word)
 {
    return d->isMultiTone(word);
 }
 int HanZiToPinYin::getResults(string word, QStringList &results)
 {
    return d->getResults(word, results);
 }
 HanZiToPinYin::HanZiToPinYin() : d(new HanZiToPinYinPrivate)
 {
 }
--- a/libchinese-segmentation/hanzi-to-pinyin.h
+++ b/libchinese-segmentation/hanzi-to-pinyin.h
@ -1,53 +0,0 @@
 #ifndef HANZITOPINYIN_H
 #define HANZITOPINYIN_H
 #include <QtCore/qglobal.h>
 //#include "cppjieba/PinYinTrie.hpp"
 #include <QStringList>
 #define PINYINMANAGER_EXPORT Q_DECL_IMPORT
 using namespace std;
 class HanZiToPinYinPrivate;
 class PINYINMANAGER_EXPORT HanZiToPinYin
 {
 public:
    static HanZiToPinYin * getInstance();
 public:
    /**
     * @brief HanZiToPinYin::isMultiTone 判断是否为多音字（只支持单字）
     * @param word 要判断的字
     * @return bool 不是多音字或不是单字返回false
     */
    bool isMultiTone(string &word);
    bool isMultiTone(string &&word);
    bool isMultiTone(const string &word);
    bool isMultiTone(const string &&word);
    /**
     * @brief HanZiToPinYin::contains 查询某个字是否有拼音（是否在数据库包含，只支持单字）
     * @param word 要查询的字
     * @return bool 数据库不包含或不是单字返回false
     */
    bool contains(string &word);
    /**
     * @brief HanZiToPinYin::getResults 获取某个字的拼音（只支持单字）
     * @param word 要获取拼音的字
     * @param results word的拼音列表（有可能多音字），每次调用results会清空
     * @return int 获取到返回0，否则返回-1
     */
    int getResults(string word, QStringList &results);
 protected:
    HanZiToPinYin();
    ~HanZiToPinYin();
    HanZiToPinYin(const HanZiToPinYin&) = delete;
    HanZiToPinYin& operator =(const HanZiToPinYin&) = delete;
 private:
    static HanZiToPinYin *g_pinYinManager;
    HanZiToPinYinPrivate *d = nullptr;
 };
 #endif // PINYINMANAGER_H
--- a/libchinese-segmentation/libchinese-segmentation.pro
+++ b/libchinese-segmentation/libchinese-segmentation.pro
@ -1,84 +0,0 @@
 QT -= gui
 VERSION = 1.0.0
 TARGET =  chinese-segmentation
 TEMPLATE = lib
 DEFINES += LIBCHINESESEGMENTATION_LIBRARY
 CONFIG += c++11 create_pc create_prl no_install_prl
 # The following define makes your compiler emit warnings if you use
 # any Qt feature that has been marked deprecated (the exact warnings
 # depend on your compiler). Please consult the documentation of the
 # deprecated API in order to know how to port your code away from it.
 DEFINES += QT_DEPRECATED_WARNINGS
 QMAKE_CXXFLAGS += -Werror=return-type -Werror=return-local-addr
 #QMAKE_CXXFLAGS += -Werror=uninitialized
 QMAKE_CXXFLAGS += -execution-charset:utf-8
 # You can also make your code fail to compile if it uses deprecated APIs.
 # In order to do so, uncomment the following line.
 # You can also select to disable deprecated APIs only up to a certain version of Qt.
 #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
 include(cppjieba/cppjieba.pri)
 #LIBS += -L/usr/local/lib/libjemalloc -ljemalloc
 SOURCES += \
    chinese-segmentation.cpp \
    hanzi-to-pinyin.cpp
 HEADERS += \
    chinese-segmentation-private.h \
    chinese-segmentation.h \
    common-struct.h \
    hanzi-to-pinyin-private.h \
    hanzi-to-pinyin.h \
    libchinese-segmentation_global.h
 dict_files.path = /usr/share/ukui-search/res/dict/
 dict_files.files = $$PWD/dict/*.utf8\
 dict_files.files += $$PWD/dict/pos_dict/*.utf8\
 dict_files.files += $$PWD/dict/*.txt\
 INSTALLS += \
    dict_files \
 # Default rules for deployment.
 unix {
    target.path = $$[QT_INSTALL_LIBS]
    QMAKE_PKGCONFIG_NAME = chinese-segmentation
    QMAKE_PKGCONFIG_DESCRIPTION = chinese-segmentation Header files
    QMAKE_PKGCONFIG_VERSION = $$VERSION
    QMAKE_PKGCONFIG_LIBDIR = $$target.path
    QMAKE_PKGCONFIG_DESTDIR = pkgconfig
    QMAKE_PKGCONFIG_INCDIR = /usr/include/chinese-seg
    QMAKE_PKGCONFIG_CFLAGS += -I/usr/include/chinese-seg
 !isEmpty(target.path): INSTALLS += target
    header.path = /usr/include/chinese-seg
    header.files += chinese-segmentation.h libchinese-segmentation_global.h common-struct.h hanzi-to-pinyin.h
    header.files += development-files/header-files/*
 #    headercppjieba.path = /usr/include/chinese-seg/cppjieba/
 #    headercppjieba.files = cppjieba/*
    INSTALLS += header
 }
 #DISTFILES += \
 #    jiaba/jieba.pri
 DISTFILES += \
    dict/README.md \
    dict/hmm_model.utf8 \
    dict/idf.utf8 \
    dict/jieba.dict.utf8 \
    dict/pos_dict/char_state_tab.utf8 \
    dict/pos_dict/prob_emit.utf8 \
    dict/pos_dict/prob_start.utf8 \
    dict/pos_dict/prob_trans.utf8 \
    dict/stop_words.utf8 \
    dict/user.dict.utf8 \
    dict/pinyinWithoutTone.txt \
    development-files/header-files/* \
--- a/libchinese-segmentation/libchinese-segmentation_global.h
+++ b/libchinese-segmentation/libchinese-segmentation_global.h
@ -1,32 +0,0 @@
 /*
 * Copyright (C) 2020, KylinSoft Co., Ltd.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * Authors: zhangzihao <zhangzihao@kylinos.cn>
 * Modified by: zhangpengfei <zhangpengfei@kylinos.cn>
 *
 */
 #ifndef CHINESESEGMENTATION_GLOBAL_H
 #define CHINESESEGMENTATION_GLOBAL_H
 #include <QtCore/qglobal.h>
 #if defined(CHINESESEGMENTATION_LIBRARY)
 #  define CHINESESEGMENTATION_EXPORT Q_DECL_EXPORT
 #else
 #  define CHINESESEGMENTATION_EXPORT Q_DECL_IMPORT
 #endif
 #endif // CHINESESEGMENTATION_GLOBAL_H
		`@ -0,0 +1 @@`
							`Subproject commit 02216728e0cf1f1304e97f7fc1f7b56f4ddc5872`