Merge branch 'main' into 1231-dev

2021-01-01 10:10:33 +08:00 · 2021-01-01 10:10:33 +08:00 · 7388ea19b0
parent c2d678492c 5a3e5dd370
commit 7388ea19b0
68 changed files with 626172 additions and 60 deletions
--- a/libchinese-segmentation/chinese-segmentation.cpp
+++ b/libchinese-segmentation/chinese-segmentation.cpp
@ -1,5 +1,45 @@
 #include "chinese-segmentation.h"
+#include <QFileInfo>

 ChineseSegmentation::ChineseSegmentation()
 {
 }
+
+QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
+{
+    const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
+    const char * const  HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
+    const char * const USER_DICT_PATH ="/usr/share/ukui-search/res/dict/user.dict.utf8";
+    const char * const  IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
+    const char * const  STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
+
+
+    cppjieba::Jieba jieba(DICT_PATH,
+           HMM_PATH,
+           USER_DICT_PATH,
+           IDF_PATH,
+           STOP_WORD_PATH);
+
+    std::string s;
+    s=str->toStdString();
+
+    const size_t topk = -1;
+    std::vector<cppjieba::KeywordExtractor::Word> keywordres;
+    jieba.extractor.Extract(s, keywordres, topk);
+    QVector<SKeyWord> vecNeeds;
+    convert(keywordres, vecNeeds);
+
+    return vecNeeds;
+
+}
+
+void ChineseSegmentation::convert(std::vector<cppjieba::KeywordExtractor::Word> &keywordres, QVector<SKeyWord> &kw)
+{
+    for (auto i : keywordres){
+        SKeyWord temp;
+        temp.word = i.word;
+        temp.offsets = QVector<size_t>::fromStdVector(i.offsets);
+        temp.weight = i.weight;
+        kw.append(temp);
+    }
+}
--- a/libchinese-segmentation/chinese-segmentation.h
+++ b/libchinese-segmentation/chinese-segmentation.h
@ -2,10 +2,27 @@
 #define CHINESESEGMENTATION_H

 #include "libchinese-segmentation_global.h"
+#include "cppjieba/Jieba.hpp"
+//#include "Logging.hpp"
+//#include "LocalVector.hpp"
+//#include "cppjieba/QuerySegment.hpp"
+#include "cppjieba/KeywordExtractor.hpp"
+#include <QVector>
+#include <QString>
+#include <QDebug>
+
+struct SKeyWord{
+    std::string word;
+    QVector<size_t> offsets;
+    double weight;
+};

 class CHINESESEGMENTATION_EXPORT ChineseSegmentation
 {
 public:
+    static QVector<SKeyWord> callSegement(QString *str);
+    static void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
+private:
    ChineseSegmentation();
 };

--- a/libchinese-segmentation/cppjieba/DictTrie.hpp
+++ b/libchinese-segmentation/cppjieba/DictTrie.hpp
@ -0,0 +1,277 @@
+#ifndef CPPJIEBA_DICT_TRIE_HPP
+#define CPPJIEBA_DICT_TRIE_HPP
+
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <string>
+#include <cstring>
+#include <cstdlib>
+#include <stdint.h>
+#include <cmath>
+#include <limits>
+#include "limonp/StringUtil.hpp"
+#include "limonp/Logging.hpp"
+#include "Unicode.hpp"
+#include "Trie.hpp"
+
+namespace cppjieba {
+
+using namespace limonp;
+
+const double MIN_DOUBLE = -3.14e+100;
+const double MAX_DOUBLE = 3.14e+100;
+const size_t DICT_COLUMN_NUM = 3;
+const char* const UNKNOWN_TAG = "";
+
+class DictTrie {
+ public:
+  enum UserWordWeightOption {
+    WordWeightMin,
+    WordWeightMedian,
+    WordWeightMax,
+  }; // enum UserWordWeightOption
+
+  DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
+    Init(dict_path, user_dict_paths, user_word_weight_opt);
+  }
+
+  ~DictTrie() {
+    delete trie_;
+  }
+
+  bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
+    DictUnit node_info;
+    if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
+      return false;
+    }
+    active_node_infos_.push_back(node_info);
+    trie_->InsertNode(node_info.word, &active_node_infos_.back());
+    return true;
+  }
+
+  bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) {
+    DictUnit node_info;
+    double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
+    if (!MakeNodeInfo(node_info, word, weight , tag)) {
+      return false;
+    }
+    active_node_infos_.push_back(node_info);
+    trie_->InsertNode(node_info.word, &active_node_infos_.back());
+    return true;
+  }
+
+  const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
+    return trie_->Find(begin, end);
+  }
+
+  void Find(RuneStrArray::const_iterator begin, 
+        RuneStrArray::const_iterator end, 
+        vector<struct Dag>&res,
+        size_t max_word_len = MAX_WORD_LENGTH) const {
+    trie_->Find(begin, end, res, max_word_len);
+  }
+
+  bool Find(const string& word)
+  {
+    const DictUnit *tmp = NULL;
+    RuneStrArray runes;
+    if (!DecodeRunesInString(word, runes))
+    {
+      XLOG(ERROR) << "Decode failed.";
+    }
+    tmp = Find(runes.begin(), runes.end());
+    if (tmp == NULL)
+    {
+      return false;
+    }
+    else
+    {
+      return true;
+    }
+  }
+
+  bool IsUserDictSingleChineseWord(const Rune& word) const {
+    return IsIn(user_dict_single_chinese_word_, word);
+  }
+
+  double GetMinWeight() const {
+    return min_weight_;
+  }
+
+  void InserUserDictNode(const string& line) {
+    vector<string> buf;
+    DictUnit node_info;
+    Split(line, buf, " ");
+    if(buf.size() == 1){
+          MakeNodeInfo(node_info, 
+                buf[0], 
+                user_word_default_weight_,
+                UNKNOWN_TAG);
+        } else if (buf.size() == 2) {
+          MakeNodeInfo(node_info, 
+                buf[0], 
+                user_word_default_weight_,
+                buf[1]);
+        } else if (buf.size() == 3) {
+          int freq = atoi(buf[1].c_str());
+          assert(freq_sum_ > 0.0);
+          double weight = log(1.0 * freq / freq_sum_);
+          MakeNodeInfo(node_info, buf[0], weight, buf[2]);
+        }
+        static_node_infos_.push_back(node_info);
+        if (node_info.word.size() == 1) {
+          user_dict_single_chinese_word_.insert(node_info.word[0]);
+        }
+  }
+  
+  void LoadUserDict(const vector<string>& buf) {
+    for (size_t i = 0; i < buf.size(); i++) {
+      InserUserDictNode(buf[i]);
+    }
+  }
+
+   void LoadUserDict(const set<string>& buf) {
+    std::set<string>::const_iterator iter;
+    for (iter = buf.begin(); iter != buf.end(); iter++){
+      InserUserDictNode(*iter);
+    }
+  }
+
+  void LoadUserDict(const string& filePaths) {
+    vector<string> files = limonp::Split(filePaths, "|;");
+    size_t lineno = 0;
+    for (size_t i = 0; i < files.size(); i++) {
+      ifstream ifs(files[i].c_str());
+      XCHECK(ifs.is_open()) << "open " << files[i] << " failed"; 
+      string line;
+      
+      for (; getline(ifs, line); lineno++) {
+        if (line.size() == 0) {
+          continue;
+        }
+        InserUserDictNode(line);
+      }
+    }
+  }
+
+
+ private:
+  void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) {
+    LoadDict(dict_path);
+    freq_sum_ = CalcFreqSum(static_node_infos_);
+    CalculateWeight(static_node_infos_, freq_sum_);
+    SetStaticWordWeights(user_word_weight_opt);
+
+    if (user_dict_paths.size()) {
+      LoadUserDict(user_dict_paths);
+    }
+    Shrink(static_node_infos_);
+    CreateTrie(static_node_infos_);
+  }
+  
+  void CreateTrie(const vector<DictUnit>& dictUnits) {
+    assert(dictUnits.size());
+    vector<Unicode> words;
+    vector<const DictUnit*> valuePointers;
+    for (size_t i = 0 ; i < dictUnits.size(); i ++) {
+      words.push_back(dictUnits[i].word);
+      valuePointers.push_back(&dictUnits[i]);
+    }
+
+    trie_ = new Trie(words, valuePointers);
+  }
+
+  
+
+
+  bool MakeNodeInfo(DictUnit& node_info,
+        const string& word, 
+        double weight, 
+        const string& tag) {
+    if (!DecodeRunesInString(word, node_info.word)) {
+      XLOG(ERROR) << "Decode " << word << " failed.";
+      return false;
+    }
+    node_info.weight = weight;
+    node_info.tag = tag;
+    return true;
+  }
+
+  void LoadDict(const string& filePath) {
+    ifstream ifs(filePath.c_str());
+    XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
+    string line;
+    vector<string> buf;
+
+    DictUnit node_info;
+    for (size_t lineno = 0; getline(ifs, line); lineno++) {
+      Split(line, buf, " ");
+      XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
+      MakeNodeInfo(node_info, 
+            buf[0], 
+            atof(buf[1].c_str()), 
+            buf[2]);
+      static_node_infos_.push_back(node_info);
+    }
+  }
+
+  static bool WeightCompare(const DictUnit& lhs, const DictUnit& rhs) {
+    return lhs.weight < rhs.weight;
+  }
+
+  void SetStaticWordWeights(UserWordWeightOption option) {
+    XCHECK(!static_node_infos_.empty());
+    vector<DictUnit> x = static_node_infos_;
+    sort(x.begin(), x.end(), WeightCompare);
+    min_weight_ = x[0].weight;
+    max_weight_ = x[x.size() - 1].weight;
+    median_weight_ = x[x.size() / 2].weight;
+    switch (option) {
+     case WordWeightMin:
+       user_word_default_weight_ = min_weight_;
+       break;
+     case WordWeightMedian:
+       user_word_default_weight_ = median_weight_;
+       break;
+     default:
+       user_word_default_weight_ = max_weight_;
+       break;
+    }
+  }
+
+  double CalcFreqSum(const vector<DictUnit>& node_infos) const {
+    double sum = 0.0;
+    for (size_t i = 0; i < node_infos.size(); i++) {
+      sum += node_infos[i].weight;
+    }
+    return sum;
+  }
+
+  void CalculateWeight(vector<DictUnit>& node_infos, double sum) const {
+    assert(sum > 0.0);
+    for (size_t i = 0; i < node_infos.size(); i++) {
+      DictUnit& node_info = node_infos[i];
+      assert(node_info.weight > 0.0);
+      node_info.weight = log(double(node_info.weight)/sum);
+    }
+  }
+
+  void Shrink(vector<DictUnit>& units) const {
+    vector<DictUnit>(units.begin(), units.end()).swap(units);
+  }
+
+  vector<DictUnit> static_node_infos_;
+  deque<DictUnit> active_node_infos_; // must not be vector
+  Trie * trie_;
+
+  double freq_sum_;
+  double min_weight_;
+  double max_weight_;
+  double median_weight_;
+  double user_word_default_weight_;
+  unordered_set<Rune> user_dict_single_chinese_word_;
+};
+}
+
+#endif
--- a/libchinese-segmentation/cppjieba/FullSegment.hpp
+++ b/libchinese-segmentation/cppjieba/FullSegment.hpp
@ -0,0 +1,93 @@
+#ifndef CPPJIEBA_FULLSEGMENT_H
+#define CPPJIEBA_FULLSEGMENT_H
+
+#include <algorithm>
+#include <set>
+#include <cassert>
+#include "limonp/Logging.hpp"
+#include "DictTrie.hpp"
+#include "SegmentBase.hpp"
+#include "Unicode.hpp"
+
+namespace cppjieba {
+class FullSegment: public SegmentBase {
+ public:
+  FullSegment(const string& dictPath) {
+    dictTrie_ = new DictTrie(dictPath);
+    isNeedDestroy_ = true;
+  }
+  FullSegment(const DictTrie* dictTrie)
+    : dictTrie_(dictTrie), isNeedDestroy_(false) {
+    assert(dictTrie_);
+  }
+  ~FullSegment() {
+    if (isNeedDestroy_) {
+      delete dictTrie_;
+    }
+  }
+  void Cut(const string& sentence, 
+        vector<string>& words) const {
+    vector<Word> tmp;
+    Cut(sentence, tmp);
+    GetStringsFromWords(tmp, words);
+  }
+  void Cut(const string& sentence, 
+        vector<Word>& words) const {
+    PreFilter pre_filter(symbols_, sentence);
+    PreFilter::Range range;
+    vector<WordRange> wrs;
+    wrs.reserve(sentence.size()/2);
+    while (pre_filter.HasNext()) {
+      range = pre_filter.Next();
+      Cut(range.begin, range.end, wrs);
+    }
+    words.clear();
+    words.reserve(wrs.size());
+    GetWordsFromWordRanges(sentence, wrs, words);
+  }
+  void Cut(RuneStrArray::const_iterator begin, 
+        RuneStrArray::const_iterator end, 
+        vector<WordRange>& res) const {
+    // result of searching in trie tree
+    LocalVector<pair<size_t, const DictUnit*> > tRes;
+
+    // max index of res's words
+    size_t maxIdx = 0;
+
+    // always equals to (uItr - begin)
+    size_t uIdx = 0;
+
+    // tmp variables
+    size_t wordLen = 0;
+    assert(dictTrie_);
+    vector<struct Dag> dags;
+    dictTrie_->Find(begin, end, dags);
+    for (size_t i = 0; i < dags.size(); i++) {
+      for (size_t j = 0; j < dags[i].nexts.size(); j++) {
+        size_t nextoffset = dags[i].nexts[j].first;
+        assert(nextoffset < dags.size());
+        const DictUnit* du = dags[i].nexts[j].second;
+        if (du == NULL) {
+          if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
+            WordRange wr(begin + i, begin + nextoffset);
+            res.push_back(wr);
+          }
+        } else {
+          wordLen = du->word.size();
+          if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
+            WordRange wr(begin + i, begin + nextoffset);
+            res.push_back(wr);
+          }
+        }
+        maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx;
+      }
+      uIdx++;
+    }
+  }
+ private:
+  const DictTrie* dictTrie_;
+  bool isNeedDestroy_;
+};
+}
+
+#endif
--- a/libchinese-segmentation/cppjieba/HMMModel.hpp
+++ b/libchinese-segmentation/cppjieba/HMMModel.hpp
@ -0,0 +1,129 @@
+#ifndef CPPJIEBA_HMMMODEL_H
+#define CPPJIEBA_HMMMODEL_H
+
+#include "limonp/StringUtil.hpp"
+#include "Trie.hpp"
+
+namespace cppjieba {
+
+using namespace limonp;
+typedef unordered_map<Rune, double> EmitProbMap;
+
+struct HMMModel {
+  /*
+   * STATUS:
+   * 0: HMMModel::B, 1: HMMModel::E, 2: HMMModel::M, 3:HMMModel::S
+   * */
+  enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
+
+  HMMModel(const string& modelPath) {
+    memset(startProb, 0, sizeof(startProb));
+    memset(transProb, 0, sizeof(transProb));
+    statMap[0] = 'B';
+    statMap[1] = 'E';
+    statMap[2] = 'M';
+    statMap[3] = 'S';
+    emitProbVec.push_back(&emitProbB);
+    emitProbVec.push_back(&emitProbE);
+    emitProbVec.push_back(&emitProbM);
+    emitProbVec.push_back(&emitProbS);
+    LoadModel(modelPath);
+  }
+  ~HMMModel() {
+  }
+  void LoadModel(const string& filePath) {
+    ifstream ifile(filePath.c_str());
+    XCHECK(ifile.is_open()) << "open " << filePath << " failed";
+    string line;
+    vector<string> tmp;
+    vector<string> tmp2;
+    //Load startProb
+    XCHECK(GetLine(ifile, line));
+    Split(line, tmp, " ");
+    XCHECK(tmp.size() == STATUS_SUM);
+    for (size_t j = 0; j< tmp.size(); j++) {
+      startProb[j] = atof(tmp[j].c_str());
+    }
+
+    //Load transProb
+    for (size_t i = 0; i < STATUS_SUM; i++) {
+      XCHECK(GetLine(ifile, line));
+      Split(line, tmp, " ");
+      XCHECK(tmp.size() == STATUS_SUM);
+      for (size_t j =0; j < STATUS_SUM; j++) {
+        transProb[i][j] = atof(tmp[j].c_str());
+      }
+    }
+
+    //Load emitProbB
+    XCHECK(GetLine(ifile, line));
+    XCHECK(LoadEmitProb(line, emitProbB));
+
+    //Load emitProbE
+    XCHECK(GetLine(ifile, line));
+    XCHECK(LoadEmitProb(line, emitProbE));
+
+    //Load emitProbM
+    XCHECK(GetLine(ifile, line));
+    XCHECK(LoadEmitProb(line, emitProbM));
+
+    //Load emitProbS
+    XCHECK(GetLine(ifile, line));
+    XCHECK(LoadEmitProb(line, emitProbS));
+  }
+  double GetEmitProb(const EmitProbMap* ptMp, Rune key, 
+        double defVal)const {
+    EmitProbMap::const_iterator cit = ptMp->find(key);
+    if (cit == ptMp->end()) {
+      return defVal;
+    }
+    return cit->second;
+  }
+  bool GetLine(ifstream& ifile, string& line) {
+    while (getline(ifile, line)) {
+      Trim(line);
+      if (line.empty()) {
+        continue;
+      }
+      if (StartsWith(line, "#")) {
+        continue;
+      }
+      return true;
+    }
+    return false;
+  }
+  bool LoadEmitProb(const string& line, EmitProbMap& mp) {
+    if (line.empty()) {
+      return false;
+    }
+    vector<string> tmp, tmp2;
+    Unicode unicode;
+    Split(line, tmp, ",");
+    for (size_t i = 0; i < tmp.size(); i++) {
+      Split(tmp[i], tmp2, ":");
+      if (2 != tmp2.size()) {
+        XLOG(ERROR) << "emitProb illegal.";
+        return false;
+      }
+      if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
+        XLOG(ERROR) << "TransCode failed.";
+        return false;
+      }
+      mp[unicode[0]] = atof(tmp2[1].c_str());
+    }
+    return true;
+  }
+
+  char statMap[STATUS_SUM];
+  double startProb[STATUS_SUM];
+  double transProb[STATUS_SUM][STATUS_SUM];
+  EmitProbMap emitProbB;
+  EmitProbMap emitProbE;
+  EmitProbMap emitProbM;
+  EmitProbMap emitProbS;
+  vector<EmitProbMap* > emitProbVec;
+}; // struct HMMModel
+
+} // namespace cppjieba
+
+#endif
--- a/libchinese-segmentation/cppjieba/HMMSegment.hpp
+++ b/libchinese-segmentation/cppjieba/HMMSegment.hpp
@ -0,0 +1,190 @@
+#ifndef CPPJIBEA_HMMSEGMENT_H
+#define CPPJIBEA_HMMSEGMENT_H
+
+#include <iostream>
+#include <fstream>
+#include <memory.h>
+#include <cassert>
+#include "HMMModel.hpp"
+#include "SegmentBase.hpp"
+
+namespace cppjieba {
+class HMMSegment: public SegmentBase {
+ public:
+  HMMSegment(const string& filePath)
+  : model_(new HMMModel(filePath)), isNeedDestroy_(true) {
+  }
+  HMMSegment(const HMMModel* model) 
+  : model_(model), isNeedDestroy_(false) {
+  }
+  ~HMMSegment() {
+    if (isNeedDestroy_) {
+      delete model_;
+    }
+  }
+
+  void Cut(const string& sentence, 
+        vector<string>& words) const {
+    vector<Word> tmp;
+    Cut(sentence, tmp);
+    GetStringsFromWords(tmp, words);
+  }
+  void Cut(const string& sentence, 
+        vector<Word>& words) const {
+    PreFilter pre_filter(symbols_, sentence);
+    PreFilter::Range range;
+    vector<WordRange> wrs;
+    wrs.reserve(sentence.size()/2);
+    while (pre_filter.HasNext()) {
+      range = pre_filter.Next();
+      Cut(range.begin, range.end, wrs);
+    }
+    words.clear();
+    words.reserve(wrs.size());
+    GetWordsFromWordRanges(sentence, wrs, words);
+  }
+  void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
+    RuneStrArray::const_iterator left = begin;
+    RuneStrArray::const_iterator right = begin;
+    while (right != end) {
+      if (right->rune < 0x80) {
+        if (left != right) {
+          InternalCut(left, right, res);
+        }
+        left = right;
+        do {
+          right = SequentialLetterRule(left, end);
+          if (right != left) {
+            break;
+          }
+          right = NumbersRule(left, end);
+          if (right != left) {
+            break;
+          }
+          right ++;
+        } while (false);
+        WordRange wr(left, right - 1);
+        res.push_back(wr);
+        left = right;
+      } else {
+        right++;
+      }
+    }
+    if (left != right) {
+      InternalCut(left, right, res);
+    }
+  }
+ private:
+  // sequential letters rule
+  RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
+    Rune x = begin->rune;
+    if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
+      begin ++;
+    } else {
+      return begin;
+    }
+    while (begin != end) {
+      x = begin->rune;
+      if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
+        begin ++;
+      } else {
+        break;
+      }
+    }
+    return begin;
+  }
+  //
+  RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
+    Rune x = begin->rune;
+    if ('0' <= x && x <= '9') {
+      begin ++;
+    } else {
+      return begin;
+    }
+    while (begin != end) {
+      x = begin->rune;
+      if ( ('0' <= x && x <= '9') || x == '.') {
+        begin++;
+      } else {
+        break;
+      }
+    }
+    return begin;
+  }
+  void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
+    vector<size_t> status;
+    Viterbi(begin, end, status);
+
+    RuneStrArray::const_iterator left = begin;
+    RuneStrArray::const_iterator right;
+    for (size_t i = 0; i < status.size(); i++) {
+      if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
+        right = begin + i + 1;
+        WordRange wr(left, right - 1);
+        res.push_back(wr);
+        left = right;
+      }
+    }
+  }
+
+  void Viterbi(RuneStrArray::const_iterator begin, 
+        RuneStrArray::const_iterator end, 
+        vector<size_t>& status) const {
+    size_t Y = HMMModel::STATUS_SUM;
+    size_t X = end - begin;
+
+    size_t XYSize = X * Y;
+    size_t now, old, stat;
+    double tmp, endE, endS;
+
+    vector<int> path(XYSize);
+    vector<double> weight(XYSize);
+
+    //start
+    for (size_t y = 0; y < Y; y++) {
+      weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
+      path[0 + y * X] = -1;
+    }
+
+    double emitProb;
+
+    for (size_t x = 1; x < X; x++) {
+      for (size_t y = 0; y < Y; y++) {
+        now = x + y*X;
+        weight[now] = MIN_DOUBLE;
+        path[now] = HMMModel::E; // warning
+        emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin+x)->rune, MIN_DOUBLE);
+        for (size_t preY = 0; preY < Y; preY++) {
+          old = x - 1 + preY * X;
+          tmp = weight[old] + model_->transProb[preY][y] + emitProb;
+          if (tmp > weight[now]) {
+            weight[now] = tmp;
+            path[now] = preY;
+          }
+        }
+      }
+    }
+
+    endE = weight[X-1+HMMModel::E*X];
+    endS = weight[X-1+HMMModel::S*X];
+    stat = 0;
+    if (endE >= endS) {
+      stat = HMMModel::E;
+    } else {
+      stat = HMMModel::S;
+    }
+
+    status.resize(X);
+    for (int x = X -1 ; x >= 0; x--) {
+      status[x] = stat;
+      stat = path[x + stat*X];
+    }
+  }
+
+  const HMMModel* model_;
+  bool isNeedDestroy_;
+}; // class HMMSegment
+
+} // namespace cppjieba
+
+#endif
--- a/libchinese-segmentation/cppjieba/Jieba.hpp
+++ b/libchinese-segmentation/cppjieba/Jieba.hpp
@ -0,0 +1,131 @@
+#ifndef CPPJIEAB_JIEBA_H
+#define CPPJIEAB_JIEBA_H
+
+#include "QuerySegment.hpp"
+#include "KeywordExtractor.hpp"
+
+namespace cppjieba {
+
+class Jieba {
+ public:
+  Jieba(const string& dict_path, 
+        const string& model_path,
+        const string& user_dict_path, 
+        const string& idfPath, 
+        const string& stopWordPath) 
+    : dict_trie_(dict_path, user_dict_path),
+      model_(model_path),
+      mp_seg_(&dict_trie_),
+      hmm_seg_(&model_),
+      mix_seg_(&dict_trie_, &model_),
+      full_seg_(&dict_trie_),
+      query_seg_(&dict_trie_, &model_),
+      extractor(&dict_trie_, &model_, idfPath, stopWordPath) {
+
+  }
+  ~Jieba() {
+  }
+
+  struct LocWord {
+    string word;
+    size_t begin;
+    size_t end;
+  }; // struct LocWord
+
+  void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
+    mix_seg_.Cut(sentence, words, hmm);
+  }
+  void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
+    mix_seg_.Cut(sentence, words, hmm);
+  }
+  void CutAll(const string& sentence, vector<string>& words) const {
+    full_seg_.Cut(sentence, words);
+  }
+  void CutAll(const string& sentence, vector<Word>& words) const {
+    full_seg_.Cut(sentence, words);
+  }
+  void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
+    query_seg_.Cut(sentence, words, hmm);
+  }
+  void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
+    query_seg_.Cut(sentence, words, hmm);
+  }
+  void CutHMM(const string& sentence, vector<string>& words) const {
+    hmm_seg_.Cut(sentence, words);
+  }
+  void CutHMM(const string& sentence, vector<Word>& words) const {
+    hmm_seg_.Cut(sentence, words);
+  }
+  void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
+    mp_seg_.Cut(sentence, words, max_word_len);
+  }
+  void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
+    mp_seg_.Cut(sentence, words, max_word_len);
+  }
+  
+  void Tag(const string& sentence, vector<pair<string, string> >& words) const {
+    mix_seg_.Tag(sentence, words);
+  }
+  string LookupTag(const string &str) const {
+    return mix_seg_.LookupTag(str);
+  }
+  bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
+    return dict_trie_.InsertUserWord(word, tag);
+  }
+
+  bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) {
+    return dict_trie_.InsertUserWord(word,freq, tag);
+  }
+
+  bool Find(const string& word)
+  {
+    return dict_trie_.Find(word);
+  }
+
+  void ResetSeparators(const string& s) {
+    //TODO
+    mp_seg_.ResetSeparators(s);
+    hmm_seg_.ResetSeparators(s);
+    mix_seg_.ResetSeparators(s);
+    full_seg_.ResetSeparators(s);
+    query_seg_.ResetSeparators(s);
+  }
+
+  const DictTrie* GetDictTrie() const {
+    return &dict_trie_;
+  } 
+  
+  const HMMModel* GetHMMModel() const {
+    return &model_;
+  }
+
+  void LoadUserDict(const vector<string>& buf)  {
+    dict_trie_.LoadUserDict(buf);
+  }
+
+  void LoadUserDict(const set<string>& buf)  {
+    dict_trie_.LoadUserDict(buf);
+  }
+
+  void LoadUserDict(const string& path)  {
+    dict_trie_.LoadUserDict(path);
+  }
+
+ private:
+  DictTrie dict_trie_;
+  HMMModel model_;
+  
+  // They share the same dict trie and model
+  MPSegment mp_seg_;
+  HMMSegment hmm_seg_;
+  MixSegment mix_seg_;
+  FullSegment full_seg_;
+  QuerySegment query_seg_;
+
+ public:
+  KeywordExtractor extractor;
+}; // class Jieba
+
+} // namespace cppjieba
+
+#endif // CPPJIEAB_JIEBA_H
--- a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
@ -0,0 +1,153 @@
+#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
+#define CPPJIEBA_KEYWORD_EXTRACTOR_H
+
+#include <cmath>
+#include <set>
+#include "MixSegment.hpp"
+
+namespace cppjieba {
+
+using namespace limonp;
+using namespace std;
+
+/*utf8*/
+class KeywordExtractor {
+ public:
+  struct Word {
+    string word;
+    vector<size_t> offsets;
+    double weight;
+  }; // struct Word
+
+  KeywordExtractor(const string& dictPath, 
+        const string& hmmFilePath, 
+        const string& idfPath, 
+        const string& stopWordPath, 
+        const string& userDict = "") 
+    : segment_(dictPath, hmmFilePath, userDict) {
+    LoadIdfDict(idfPath);
+    LoadStopWordDict(stopWordPath);
+  }
+  KeywordExtractor(const DictTrie* dictTrie, 
+        const HMMModel* model,
+        const string& idfPath, 
+        const string& stopWordPath) 
+    : segment_(dictTrie, model) {
+    LoadIdfDict(idfPath);
+    LoadStopWordDict(stopWordPath);
+  }
+  ~KeywordExtractor() {
+  }
+
+  void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
+    vector<Word> topWords;
+    Extract(sentence, topWords, topN);
+    for (size_t i = 0; i < topWords.size(); i++) {
+      keywords.push_back(topWords[i].word);
+    }
+  }
+
+  void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
+    vector<Word> topWords;
+    Extract(sentence, topWords, topN);
+    for (size_t i = 0; i < topWords.size(); i++) {
+      keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
+    }
+  }
+
+  void Extract(const string& sentence, vector<Word>& keywords, size_t topN) const {
+    vector<string> words;
+    segment_.Cut(sentence, words);
+
+    map<string, Word> wordmap;
+    size_t offset = 0;
+    for (size_t i = 0; i < words.size(); ++i) {
+      size_t t = offset;
+      offset += words[i].size();
+      if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
+        continue;
+      }
+      wordmap[words[i]].offsets.push_back(t);
+      wordmap[words[i]].weight += 1.0;
+    }
+    if (offset != sentence.size()) {
+      XLOG(ERROR) << "words illegal";
+      return;
+    }
+
+    keywords.clear();
+    keywords.reserve(wordmap.size());
+    for (map<string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
+      unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);
+      if (cit != idfMap_.end()) {
+        itr->second.weight *= cit->second;
+      } else {
+        itr->second.weight *= idfAverage_;
+      }
+      itr->second.word = itr->first;
+      keywords.push_back(itr->second);
+    }
+    topN = min(topN, keywords.size());
+    partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
+    keywords.resize(topN);
+  }
+ private:
+  void LoadIdfDict(const string& idfPath) {
+    ifstream ifs(idfPath.c_str());
+    XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
+    string line ;
+    vector<string> buf;
+    double idf = 0.0;
+    double idfSum = 0.0;
+    size_t lineno = 0;
+    for (; getline(ifs, line); lineno++) {
+      buf.clear();
+      if (line.empty()) {
+        XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
+        continue;
+      }
+      Split(line, buf, " ");
+      if (buf.size() != 2) {
+        XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
+        continue;
+      }
+      idf = atof(buf[1].c_str());
+      idfMap_[buf[0]] = idf;
+      idfSum += idf;
+
+    }
+
+    assert(lineno);
+    idfAverage_ = idfSum / lineno;
+    assert(idfAverage_ > 0.0);
+  }
+  void LoadStopWordDict(const string& filePath) {
+    ifstream ifs(filePath.c_str());
+    XCHECK(ifs.is_open()) << "open " << filePath << " failed";
+    string line ;
+    while (getline(ifs, line)) {
+      stopWords_.insert(line);
+    }
+    assert(stopWords_.size());
+  }
+
+  static bool Compare(const Word& lhs, const Word& rhs) {
+    return lhs.weight > rhs.weight;
+  }
+
+  MixSegment segment_;
+  unordered_map<string, double> idfMap_;
+  double idfAverage_;
+
+  unordered_set<string> stopWords_;
+}; // class KeywordExtractor
+
+inline ostream& operator << (ostream& os, const KeywordExtractor::Word& word) {
+  return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}"; 
+}
+
+} // namespace cppjieba
+
+#endif
+
+
--- a/libchinese-segmentation/cppjieba/MPSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MPSegment.hpp
@ -0,0 +1,137 @@
+#ifndef CPPJIEBA_MPSEGMENT_H
+#define CPPJIEBA_MPSEGMENT_H
+
+#include <algorithm>
+#include <set>
+#include <cassert>
+#include "limonp/Logging.hpp"
+#include "DictTrie.hpp"
+#include "SegmentTagged.hpp"
+#include "PosTagger.hpp"
+
+namespace cppjieba {
+
+class MPSegment: public SegmentTagged {
+ public:
+  MPSegment(const string& dictPath, const string& userDictPath = "")
+    : dictTrie_(new DictTrie(dictPath, userDictPath)), isNeedDestroy_(true) {
+  }
+  MPSegment(const DictTrie* dictTrie)
+    : dictTrie_(dictTrie), isNeedDestroy_(false) {
+    assert(dictTrie_);
+  }
+  ~MPSegment() {
+    if (isNeedDestroy_) {
+      delete dictTrie_;
+    }
+  }
+
+  void Cut(const string& sentence, vector<string>& words) const {
+    Cut(sentence, words, MAX_WORD_LENGTH);
+  }
+
+  void Cut(const string& sentence,
+        vector<string>& words,
+        size_t max_word_len) const {
+    vector<Word> tmp;
+    Cut(sentence, tmp, max_word_len);
+    GetStringsFromWords(tmp, words);
+  }
+  void Cut(const string& sentence, 
+        vector<Word>& words, 
+        size_t max_word_len = MAX_WORD_LENGTH) const {
+    PreFilter pre_filter(symbols_, sentence);
+    PreFilter::Range range;
+    vector<WordRange> wrs;
+    wrs.reserve(sentence.size()/2);
+    while (pre_filter.HasNext()) {
+      range = pre_filter.Next();
+      Cut(range.begin, range.end, wrs, max_word_len);
+    }
+    words.clear();
+    words.reserve(wrs.size());
+    GetWordsFromWordRanges(sentence, wrs, words);
+  }
+  void Cut(RuneStrArray::const_iterator begin,
+           RuneStrArray::const_iterator end,
+           vector<WordRange>& words,
+           size_t max_word_len = MAX_WORD_LENGTH) const {
+    vector<Dag> dags;
+    dictTrie_->Find(begin, 
+          end, 
+          dags,
+          max_word_len);
+    CalcDP(dags);
+    CutByDag(begin, end, dags, words);
+  }
+
+  const DictTrie* GetDictTrie() const {
+    return dictTrie_;
+  }
+
+  bool Tag(const string& src, vector<pair<string, string> >& res) const {
+    return tagger_.Tag(src, res, *this);
+  }
+
+  bool IsUserDictSingleChineseWord(const Rune& value) const {
+    return dictTrie_->IsUserDictSingleChineseWord(value);
+  }
+ private:
+  void CalcDP(vector<Dag>& dags) const {
+    size_t nextPos;
+    const DictUnit* p;
+    double val;
+
+    for (vector<Dag>::reverse_iterator rit = dags.rbegin(); rit != dags.rend(); rit++) {
+      rit->pInfo = NULL;
+      rit->weight = MIN_DOUBLE;
+      assert(!rit->nexts.empty());
+      for (LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) {
+        nextPos = it->first;
+        p = it->second;
+        val = 0.0;
+        if (nextPos + 1 < dags.size()) {
+          val += dags[nextPos + 1].weight;
+        }
+
+        if (p) {
+          val += p->weight;
+        } else {
+          val += dictTrie_->GetMinWeight();
+        }
+        if (val > rit->weight) {
+          rit->pInfo = p;
+          rit->weight = val;
+        }
+      }
+    }
+  }
+  void CutByDag(RuneStrArray::const_iterator begin, 
+        RuneStrArray::const_iterator end, 
+        const vector<Dag>& dags, 
+        vector<WordRange>& words) const {
+    size_t i = 0;
+    while (i < dags.size()) {
+      const DictUnit* p = dags[i].pInfo;
+      if (p) {
+        assert(p->word.size() >= 1);
+        WordRange wr(begin + i, begin + i + p->word.size() - 1);
+        words.push_back(wr);
+        i += p->word.size();
+      } else { //single chinese word
+        WordRange wr(begin + i, begin + i);
+        words.push_back(wr);
+        i++;
+      }
+    }
+  }
+
+  const DictTrie* dictTrie_;
+  bool isNeedDestroy_;
+  PosTagger tagger_;
+
+}; // class MPSegment
+
+} // namespace cppjieba
+
+#endif
--- a/libchinese-segmentation/cppjieba/MixSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MixSegment.hpp
@ -0,0 +1,109 @@
+#ifndef CPPJIEBA_MIXSEGMENT_H
+#define CPPJIEBA_MIXSEGMENT_H
+
+#include <cassert>
+#include "MPSegment.hpp"
+#include "HMMSegment.hpp"
+#include "limonp/StringUtil.hpp"
+#include "PosTagger.hpp"
+
+namespace cppjieba {
+class MixSegment: public SegmentTagged {
+ public:
+  MixSegment(const string& mpSegDict, const string& hmmSegDict, 
+        const string& userDict = "") 
+    : mpSeg_(mpSegDict, userDict), 
+      hmmSeg_(hmmSegDict) {
+  }
+  MixSegment(const DictTrie* dictTrie, const HMMModel* model) 
+    : mpSeg_(dictTrie), hmmSeg_(model) {
+  }
+  ~MixSegment() {
+  }
+
+  void Cut(const string& sentence, vector<string>& words) const {
+    Cut(sentence, words, true);
+  }
+  void Cut(const string& sentence, vector<string>& words, bool hmm) const {
+    vector<Word> tmp;
+    Cut(sentence, tmp, hmm);
+    GetStringsFromWords(tmp, words);
+  }
+  void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
+    PreFilter pre_filter(symbols_, sentence);
+    PreFilter::Range range;
+    vector<WordRange> wrs;
+    wrs.reserve(sentence.size() / 2);
+    while (pre_filter.HasNext()) {
+      range = pre_filter.Next();
+      Cut(range.begin, range.end, wrs, hmm);
+    }
+    words.clear();
+    words.reserve(wrs.size());
+    GetWordsFromWordRanges(sentence, wrs, words);
+  }
+
+  void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
+    if (!hmm) {
+      mpSeg_.Cut(begin, end, res);
+      return;
+    }
+    vector<WordRange> words;
+    assert(end >= begin);
+    words.reserve(end - begin);
+    mpSeg_.Cut(begin, end, words);
+
+    vector<WordRange> hmmRes;
+    hmmRes.reserve(end - begin);
+    for (size_t i = 0; i < words.size(); i++) {
+      //if mp Get a word, it's ok, put it into result
+      if (words[i].left != words[i].right || (words[i].left == words[i].right && mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
+        res.push_back(words[i]);
+        continue;
+      }
+
+      // if mp Get a single one and it is not in userdict, collect it in sequence
+      size_t j = i;
+      while (j < words.size() && words[j].left == words[j].right && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
+        j++;
+      }
+
+      // Cut the sequence with hmm
+      assert(j - 1 >= i);
+      // TODO
+      hmmSeg_.Cut(words[i].left, words[j - 1].left + 1, hmmRes);
+      //put hmm result to result
+      for (size_t k = 0; k < hmmRes.size(); k++) {
+        res.push_back(hmmRes[k]);
+      }
+
+      //clear tmp vars
+      hmmRes.clear();
+
+      //let i jump over this piece
+      i = j - 1;
+    }
+  }
+
+  const DictTrie* GetDictTrie() const {
+    return mpSeg_.GetDictTrie();
+  }
+
+  bool Tag(const string& src, vector<pair<string, string> >& res) const {
+    return tagger_.Tag(src, res, *this);
+  }
+
+  string LookupTag(const string &str) const {
+    return tagger_.LookupTag(str, *this);
+  }
+
+ private:
+  MPSegment mpSeg_;
+  HMMSegment hmmSeg_;
+  PosTagger tagger_;
+
+}; // class MixSegment
+
+} // namespace cppjieba
+
+#endif
--- a/libchinese-segmentation/cppjieba/PosTagger.hpp
+++ b/libchinese-segmentation/cppjieba/PosTagger.hpp
@ -0,0 +1,77 @@
+#ifndef CPPJIEBA_POS_TAGGING_H
+#define CPPJIEBA_POS_TAGGING_H
+
+#include "limonp/StringUtil.hpp"
+#include "SegmentTagged.hpp"
+#include "DictTrie.hpp"
+
+namespace cppjieba {
+using namespace limonp;
+
+static const char* const POS_M = "m";
+static const char* const POS_ENG = "eng";
+static const char* const POS_X = "x";
+
+class PosTagger {
+ public:
+  PosTagger() {
+  }
+  ~PosTagger() {
+  }
+
+  bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
+    vector<string> CutRes;
+    segment.Cut(src, CutRes);
+
+    for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
+      res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
+    }
+    return !res.empty();
+  }
+
+  string LookupTag(const string &str, const SegmentTagged& segment) const {
+    const DictUnit *tmp = NULL;
+    RuneStrArray runes;
+    const DictTrie * dict = segment.GetDictTrie();
+    assert(dict != NULL);
+      if (!DecodeRunesInString(str, runes)) {
+        XLOG(ERROR) << "Decode failed.";
+        return POS_X;
+      }
+      tmp = dict->Find(runes.begin(), runes.end());
+      if (tmp == NULL || tmp->tag.empty()) {
+        return SpecialRule(runes);
+      } else {
+        return tmp->tag;
+      }
+  }
+
+ private:
+  const char* SpecialRule(const RuneStrArray& unicode) const {
+    size_t m = 0;
+    size_t eng = 0;
+    for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
+      if (unicode[i].rune < 0x80) {
+        eng ++;
+        if ('0' <= unicode[i].rune && unicode[i].rune <= '9') {
+          m++;
+        }
+      }
+    }
+    // ascii char is not found
+    if (eng == 0) {
+      return POS_X;
+    }
+    // all the ascii is number char
+    if (m == eng) {
+      return POS_M;
+    }
+    // the ascii chars contain english letter
+    return POS_ENG;
+  }
+
+}; // class PosTagger
+
+} // namespace cppjieba
+
+#endif
--- a/libchinese-segmentation/cppjieba/PreFilter.hpp
+++ b/libchinese-segmentation/cppjieba/PreFilter.hpp
@ -0,0 +1,54 @@
+#ifndef CPPJIEBA_PRE_FILTER_H
+#define CPPJIEBA_PRE_FILTER_H
+
+#include "Trie.hpp"
+#include "limonp/Logging.hpp"
+
+namespace cppjieba {
+
+class PreFilter {
+ public:
+  //TODO use WordRange instead of Range
+  struct Range {
+    RuneStrArray::const_iterator begin;
+    RuneStrArray::const_iterator end;
+  }; // struct Range
+
+  PreFilter(const unordered_set<Rune>& symbols, 
+        const string& sentence)
+    : symbols_(symbols) {
+    if (!DecodeRunesInString(sentence, sentence_)) {
+      XLOG(ERROR) << "decode failed. "; 
+    }
+    cursor_ = sentence_.begin();
+  }
+  ~PreFilter() {
+  }
+  bool HasNext() const {
+    return cursor_ != sentence_.end();
+  }
+  Range Next() {
+    Range range;
+    range.begin = cursor_;
+    while (cursor_ != sentence_.end()) {
+      if (IsIn(symbols_, cursor_->rune)) {
+        if (range.begin == cursor_) {
+          cursor_ ++;
+        }
+        range.end = cursor_;
+        return range;
+      }
+      cursor_ ++;
+    }
+    range.end = sentence_.end();
+    return range;
+  }
+ private:
+  RuneStrArray::const_iterator cursor_;
+  RuneStrArray sentence_;
+  const unordered_set<Rune>& symbols_;
+}; // class PreFilter
+
+} // namespace cppjieba
+
+#endif // CPPJIEBA_PRE_FILTER_H
--- a/libchinese-segmentation/cppjieba/QuerySegment.hpp
+++ b/libchinese-segmentation/cppjieba/QuerySegment.hpp
@ -0,0 +1,89 @@
+#ifndef CPPJIEBA_QUERYSEGMENT_H
+#define CPPJIEBA_QUERYSEGMENT_H
+
+#include <algorithm>
+#include <set>
+#include <cassert>
+#include "limonp/Logging.hpp"
+#include "DictTrie.hpp"
+#include "SegmentBase.hpp"
+#include "FullSegment.hpp"
+#include "MixSegment.hpp"
+#include "Unicode.hpp"
+
+namespace cppjieba {
+class QuerySegment: public SegmentBase {
+ public:
+  QuerySegment(const string& dict, const string& model, const string& userDict = "")
+    : mixSeg_(dict, model, userDict),
+      trie_(mixSeg_.GetDictTrie()) {
+  }
+  QuerySegment(const DictTrie* dictTrie, const HMMModel* model)
+    : mixSeg_(dictTrie, model), trie_(dictTrie) {
+  }
+  ~QuerySegment() {
+  }
+
+  void Cut(const string& sentence, vector<string>& words) const {
+    Cut(sentence, words, true);
+  }
+  void Cut(const string& sentence, vector<string>& words, bool hmm) const {
+    vector<Word> tmp;
+    Cut(sentence, tmp, hmm);
+    GetStringsFromWords(tmp, words);
+  }
+  void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
+    PreFilter pre_filter(symbols_, sentence);
+    PreFilter::Range range;
+    vector<WordRange> wrs;
+    wrs.reserve(sentence.size()/2);
+    while (pre_filter.HasNext()) {
+      range = pre_filter.Next();
+      Cut(range.begin, range.end, wrs, hmm);
+    }
+    words.clear();
+    words.reserve(wrs.size());
+    GetWordsFromWordRanges(sentence, wrs, words);
+  }
+  void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
+    //use mix Cut first
+    vector<WordRange> mixRes;
+    mixSeg_.Cut(begin, end, mixRes, hmm);
+
+    vector<WordRange> fullRes;
+    for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
+      if (mixResItr->Length() > 2) {
+        for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
+          WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
+          if (trie_->Find(wr.left, wr.right + 1) != NULL) {
+            res.push_back(wr);
+          }
+        }
+      }
+      if (mixResItr->Length() > 3) {
+        for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
+          WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
+          if (trie_->Find(wr.left, wr.right + 1) != NULL) {
+            res.push_back(wr);
+          }
+        }
+      }
+      res.push_back(*mixResItr);
+    }
+  }
+ private:
+  bool IsAllAscii(const Unicode& s) const {
+   for(size_t i = 0; i < s.size(); i++) {
+     if (s[i] >= 0x80) {
+       return false;
+     }
+   }
+   return true;
+  }
+  MixSegment mixSeg_;
+  const DictTrie* trie_;
+}; // QuerySegment
+
+} // namespace cppjieba
+
+#endif
--- a/libchinese-segmentation/cppjieba/SegmentBase.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentBase.hpp
@ -0,0 +1,46 @@
+#ifndef CPPJIEBA_SEGMENTBASE_H
+#define CPPJIEBA_SEGMENTBASE_H
+
+#include "limonp/Logging.hpp"
+#include "PreFilter.hpp"
+#include <cassert>
+
+
+namespace cppjieba {
+
+const char* const SPECIAL_SEPARATORS = " \t\n\xEF\xBC\x8C\xE3\x80\x82";
+
+using namespace limonp;
+
+class SegmentBase {
+ public:
+  SegmentBase() {
+    XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
+  }
+  virtual ~SegmentBase() {
+  }
+
+  virtual void Cut(const string& sentence, vector<string>& words) const = 0;
+
+  bool ResetSeparators(const string& s) {
+    symbols_.clear();
+    RuneStrArray runes;
+    if (!DecodeRunesInString(s, runes)) {
+      XLOG(ERROR) << "decode " << s << " failed";
+      return false;
+    }
+    for (size_t i = 0; i < runes.size(); i++) {
+      if (!symbols_.insert(runes[i].rune).second) {
+        XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
+        return false;
+      }
+    }
+    return true;
+  }
+ protected:
+  unordered_set<Rune> symbols_;
+}; // class SegmentBase
+
+} // cppjieba
+
+#endif
--- a/libchinese-segmentation/cppjieba/SegmentTagged.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentTagged.hpp
@ -0,0 +1,23 @@
+#ifndef CPPJIEBA_SEGMENTTAGGED_H
+#define CPPJIEBA_SEGMENTTAGGED_H
+
+#include "SegmentBase.hpp"
+
+namespace cppjieba {
+
+class SegmentTagged : public SegmentBase{
+ public:
+  SegmentTagged() {
+  }
+  virtual ~SegmentTagged() {
+  }
+
+  virtual bool Tag(const string& src, vector<pair<string, string> >& res) const = 0;
+
+  virtual const DictTrie* GetDictTrie() const = 0;
+
+}; // class SegmentTagged
+
+} // cppjieba
+
+#endif
--- a/libchinese-segmentation/cppjieba/TextRankExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/TextRankExtractor.hpp
@ -0,0 +1,190 @@
+#ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H
+#define CPPJIEBA_TEXTRANK_EXTRACTOR_H
+
+#include <cmath>
+#include "Jieba.hpp"
+
+namespace cppjieba {
+  using namespace limonp;
+  using namespace std;
+
+  class TextRankExtractor {
+  public:
+    typedef struct _Word {string word;vector<size_t> offsets;double weight;}    Word; // struct Word
+  private:
+    typedef std::map<string,Word> WordMap;
+  
+    class WordGraph{
+    private:
+      typedef double Score;
+      typedef string Node;
+      typedef std::set<Node> NodeSet;
+
+      typedef std::map<Node,double> Edges;
+      typedef std::map<Node,Edges> Graph;
+      //typedef std::unordered_map<Node,double> Edges;
+      //typedef std::unordered_map<Node,Edges> Graph;
+
+      double d;
+      Graph graph;
+      NodeSet nodeSet;
+    public:
+      WordGraph(): d(0.85) {};
+      WordGraph(double in_d): d(in_d) {};
+
+      void addEdge(Node start,Node end,double weight){
+        Edges temp;
+        Edges::iterator gotEdges;
+        nodeSet.insert(start);
+        nodeSet.insert(end);
+        graph[start][end]+=weight;
+        graph[end][start]+=weight;
+      }
+
+      void rank(WordMap &ws,size_t rankTime=10){
+        WordMap outSum;
+        Score wsdef, min_rank, max_rank;
+
+        if( graph.size() == 0)
+          return;
+
+        wsdef = 1.0 / graph.size();
+
+        for(Graph::iterator edges=graph.begin();edges!=graph.end();++edges){
+          // edges->first start节点；edge->first end节点；edge->second 权重
+          ws[edges->first].word=edges->first;
+          ws[edges->first].weight=wsdef;
+          outSum[edges->first].weight=0;
+          for(Edges::iterator edge=edges->second.begin();edge!=edges->second.end();++edge){
+            outSum[edges->first].weight+=edge->second;
+          }
+        }
+        //sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
+        for( size_t i=0; i<rankTime; i++ ){
+          for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++ ){
+            double s = 0;
+            for( Edges::iterator edge= graph[*node].begin(); edge != graph[*node].end(); edge++ )
+              // edge->first end节点；edge->second 权重
+              s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
+            ws[*node].weight = (1 - d) + d * s;
+          }
+        }
+
+        min_rank=max_rank=ws.begin()->second.weight;
+        for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
+          if( i->second.weight < min_rank ){
+            min_rank = i->second.weight;
+          }
+          if( i->second.weight > max_rank ){
+            max_rank = i->second.weight;
+          }
+        }
+        for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
+          ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
+        }
+      }
+    };
+
+  public: 
+  TextRankExtractor(const string& dictPath, 
+        const string& hmmFilePath, 
+        const string& stopWordPath, 
+        const string& userDict = "") 
+    : segment_(dictPath, hmmFilePath, userDict) {
+    LoadStopWordDict(stopWordPath);
+  }
+  TextRankExtractor(const DictTrie* dictTrie, 
+        const HMMModel* model,
+        const string& stopWordPath) 
+    : segment_(dictTrie, model) {
+    LoadStopWordDict(stopWordPath);
+  }
+    TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
+        LoadStopWordDict(stopWordPath);
+    }
+    ~TextRankExtractor() {
+    }
+
+    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
+      vector<Word> topWords;
+      Extract(sentence, topWords, topN);
+      for (size_t i = 0; i < topWords.size(); i++) {
+        keywords.push_back(topWords[i].word);
+      }
+    }
+
+    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
+      vector<Word> topWords;
+      Extract(sentence, topWords, topN);
+      for (size_t i = 0; i < topWords.size(); i++) {
+        keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
+      }
+    }
+
+    void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span=5,size_t rankTime=10) const {
+      vector<string> words;
+      segment_.Cut(sentence, words);
+
+      TextRankExtractor::WordGraph graph;
+      WordMap wordmap;
+      size_t offset = 0;
+
+      for(size_t i=0; i < words.size(); i++){
+        size_t t = offset;
+        offset += words[i].size();
+        if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
+          continue;
+        }
+        for(size_t j=i+1,skip=0;j<i+span+skip && j<words.size();j++){
+          if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
+            skip++;
+            continue;
+          }
+          graph.addEdge(words[i],words[j],1);
+        }
+        wordmap[words[i]].offsets.push_back(t);
+      }
+      if (offset != sentence.size()) {
+        XLOG(ERROR) << "words illegal";
+        return;
+      }
+
+      graph.rank(wordmap,rankTime);
+      
+      keywords.clear();
+      keywords.reserve(wordmap.size());
+      for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
+        keywords.push_back(itr->second);
+      }
+      
+      topN = min(topN, keywords.size());
+      partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
+      keywords.resize(topN);
+    }
+  private:
+    void LoadStopWordDict(const string& filePath) {
+      ifstream ifs(filePath.c_str());
+      XCHECK(ifs.is_open()) << "open " << filePath << " failed";
+      string line ;
+      while (getline(ifs, line)) {
+        stopWords_.insert(line);
+      }
+      assert(stopWords_.size());
+    }
+
+    static bool Compare(const Word &x,const Word &y){
+      return x.weight > y.weight;
+    }
+
+    MixSegment segment_;
+    unordered_set<string> stopWords_;
+  }; // class TextRankExtractor
+  
+  inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
+    return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}"; 
+  }
+} // namespace cppjieba
+
+#endif
+
+
--- a/libchinese-segmentation/cppjieba/Trie.hpp
+++ b/libchinese-segmentation/cppjieba/Trie.hpp
@ -0,0 +1,174 @@
+#ifndef CPPJIEBA_TRIE_HPP
+#define CPPJIEBA_TRIE_HPP
+
+#include <vector>
+#include <queue>
+#include "limonp/StdExtension.hpp"
+#include "Unicode.hpp"
+
+namespace cppjieba {
+
+using namespace std;
+
+const size_t MAX_WORD_LENGTH = 512;
+
+struct DictUnit {
+  Unicode word;
+  double weight;
+  string tag;
+}; // struct DictUnit
+
+// for debugging
+// inline ostream & operator << (ostream& os, const DictUnit& unit) {
+//   string s;
+//   s << unit.word;
+//   return os << StringFormat("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
+// }
+
+struct Dag {
+  RuneStr runestr;
+  // [offset, nexts.first]
+  limonp::LocalVector<pair<size_t, const DictUnit*> > nexts;
+  const DictUnit * pInfo;
+  double weight;
+  size_t nextPos; // TODO
+  Dag():runestr(), pInfo(NULL), weight(0.0), nextPos(0) {
+  }
+}; // struct Dag
+
+typedef Rune TrieKey;
+
+class TrieNode {
+ public :
+  TrieNode(): next(NULL), ptValue(NULL) {
+  }
+ public:
+  typedef unordered_map<TrieKey, TrieNode*> NextMap;
+  NextMap *next;
+  const DictUnit *ptValue;
+};
+
+class Trie {
+ public:
+  Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
+   : root_(new TrieNode) {
+    CreateTrie(keys, valuePointers);
+  }
+  ~Trie() {
+    DeleteNode(root_);
+  }
+
+  const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
+    if (begin == end) {
+      return NULL;
+    }
+
+    const TrieNode* ptNode = root_;
+    TrieNode::NextMap::const_iterator citer;
+    for (RuneStrArray::const_iterator it = begin; it != end; it++) {
+      if (NULL == ptNode->next) {
+        return NULL;
+      }
+      citer = ptNode->next->find(it->rune);
+      if (ptNode->next->end() == citer) {
+        return NULL;
+      }
+      ptNode = citer->second;
+    }
+    return ptNode->ptValue;
+  }
+
+  void Find(RuneStrArray::const_iterator begin, 
+        RuneStrArray::const_iterator end, 
+        vector<struct Dag>&res, 
+        size_t max_word_len = MAX_WORD_LENGTH) const {
+    assert(root_ != NULL);
+    res.resize(end - begin);
+
+    const TrieNode *ptNode = NULL;
+    TrieNode::NextMap::const_iterator citer;
+    for (size_t i = 0; i < size_t(end - begin); i++) {
+      res[i].runestr = *(begin + i);
+
+      if (root_->next != NULL && root_->next->end() != (citer = root_->next->find(res[i].runestr.rune))) {
+        ptNode = citer->second;
+      } else {
+        ptNode = NULL;
+      }
+      if (ptNode != NULL) {
+        res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, ptNode->ptValue));
+      } else {
+        res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, static_cast<const DictUnit*>(NULL)));
+      }
+
+      for (size_t j = i + 1; j < size_t(end - begin) && (j - i + 1) <= max_word_len; j++) {
+        if (ptNode == NULL || ptNode->next == NULL) {
+          break;
+        }
+        citer = ptNode->next->find((begin + j)->rune);
+        if (ptNode->next->end() == citer) {
+          break;
+        }
+        ptNode = citer->second;
+        if (NULL != ptNode->ptValue) {
+          res[i].nexts.push_back(pair<size_t, const DictUnit*>(j, ptNode->ptValue));
+        }
+      }
+    }
+  }
+
+  void InsertNode(const Unicode& key, const DictUnit* ptValue) {
+    if (key.begin() == key.end()) {
+      return;
+    }
+
+    TrieNode::NextMap::const_iterator kmIter;
+    TrieNode *ptNode = root_;
+    for (Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) {
+      if (NULL == ptNode->next) {
+        ptNode->next = new TrieNode::NextMap;
+      }
+      kmIter = ptNode->next->find(*citer);
+      if (ptNode->next->end() == kmIter) {
+        TrieNode *nextNode = new TrieNode;
+
+        ptNode->next->insert(make_pair(*citer, nextNode));
+        ptNode = nextNode;
+      } else {
+        ptNode = kmIter->second;
+      }
+    }
+    assert(ptNode != NULL);
+    ptNode->ptValue = ptValue;
+  }
+
+ private:
+  void CreateTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
+    if (valuePointers.empty() || keys.empty()) {
+      return;
+    }
+    assert(keys.size() == valuePointers.size());
+
+    for (size_t i = 0; i < keys.size(); i++) {
+      InsertNode(keys[i], valuePointers[i]);
+    }
+  }
+
+  void DeleteNode(TrieNode* node) {
+    if (NULL == node) {
+      return;
+    }
+    if (NULL != node->next) {
+      for (TrieNode::NextMap::iterator it = node->next->begin(); it != node->next->end(); ++it) {
+        DeleteNode(it->second);
+      }
+      delete node->next;
+    }
+    delete node;
+  }
+
+  TrieNode* root_;
+}; // class Trie
+} // namespace cppjieba
+
+#endif // CPPJIEBA_TRIE_HPP
--- a/libchinese-segmentation/cppjieba/Unicode.hpp
+++ b/libchinese-segmentation/cppjieba/Unicode.hpp
@ -0,0 +1,227 @@
+#ifndef CPPJIEBA_UNICODE_H
+#define CPPJIEBA_UNICODE_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <vector>
+#include <ostream>
+#include "limonp/LocalVector.hpp"
+
+namespace cppjieba {
+
+using std::string;
+using std::vector;
+
+typedef uint32_t Rune;
+
+struct Word {
+  string word;
+  uint32_t offset;
+  uint32_t unicode_offset;
+  uint32_t unicode_length;
+  Word(const string& w, uint32_t o)
+   : word(w), offset(o) {
+  }
+  Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
+          : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
+  }
+}; // struct Word
+
+inline std::ostream& operator << (std::ostream& os, const Word& w) {
+  return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
+}
+
+struct RuneStr {
+  Rune rune;
+  uint32_t offset;
+  uint32_t len;
+  uint32_t unicode_offset;
+  uint32_t unicode_length;
+  RuneStr(): rune(0), offset(0), len(0), unicode_offset(0), unicode_length(0) {
+  }
+  RuneStr(Rune r, uint32_t o, uint32_t l)
+    : rune(r), offset(o), len(l), unicode_offset(0), unicode_length(0) {
+  }
+  RuneStr(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
+          : rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
+  }
+}; // struct RuneStr
+
+inline std::ostream& operator << (std::ostream& os, const RuneStr& r) {
+  return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
+}
+
+typedef limonp::LocalVector<Rune> Unicode;
+typedef limonp::LocalVector<struct RuneStr> RuneStrArray;
+
+// [left, right]
+struct WordRange {
+  RuneStrArray::const_iterator left;
+  RuneStrArray::const_iterator right;
+  WordRange(RuneStrArray::const_iterator l, RuneStrArray::const_iterator r)
+   : left(l), right(r) {
+  }
+  size_t Length() const {
+    return right - left + 1;
+  }
+  bool IsAllAscii() const {
+    for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
+      if (iter->rune >= 0x80) {
+        return false;
+      }
+    }
+    return true;
+  }
+}; // struct WordRange
+
+struct RuneStrLite {
+  uint32_t rune;
+  uint32_t len;
+  RuneStrLite(): rune(0), len(0) {
+  }
+  RuneStrLite(uint32_t r, uint32_t l): rune(r), len(l) {
+  }
+}; // struct RuneStrLite
+
+inline RuneStrLite DecodeRuneInString(const char* str, size_t len) {
+  RuneStrLite rp(0, 0);
+  if (str == NULL || len == 0) {
+    return rp;
+  }
+  if (!(str[0] & 0x80)) { // 0xxxxxxx
+    // 7bit, total 7bit
+    rp.rune = (uint8_t)(str[0]) & 0x7f;
+    rp.len = 1;
+  } else if ((uint8_t)str[0] <= 0xdf &&  1 < len) { 
+    // 110xxxxxx
+    // 5bit, total 5bit
+    rp.rune = (uint8_t)(str[0]) & 0x1f;
+
+    // 6bit, total 11bit
+    rp.rune <<= 6;
+    rp.rune |= (uint8_t)(str[1]) & 0x3f;
+    rp.len = 2;
+  } else if((uint8_t)str[0] <= 0xef && 2 < len) { // 1110xxxxxx
+    // 4bit, total 4bit
+    rp.rune = (uint8_t)(str[0]) & 0x0f;
+
+    // 6bit, total 10bit
+    rp.rune <<= 6;
+    rp.rune |= (uint8_t)(str[1]) & 0x3f;
+
+    // 6bit, total 16bit
+    rp.rune <<= 6;
+    rp.rune |= (uint8_t)(str[2]) & 0x3f;
+
+    rp.len = 3;
+  } else if((uint8_t)str[0] <= 0xf7 && 3 < len) { // 11110xxxx
+    // 3bit, total 3bit
+    rp.rune = (uint8_t)(str[0]) & 0x07;
+
+    // 6bit, total 9bit
+    rp.rune <<= 6;
+    rp.rune |= (uint8_t)(str[1]) & 0x3f;
+
+    // 6bit, total 15bit
+    rp.rune <<= 6;
+    rp.rune |= (uint8_t)(str[2]) & 0x3f;
+
+    // 6bit, total 21bit
+    rp.rune <<= 6;
+    rp.rune |= (uint8_t)(str[3]) & 0x3f;
+
+    rp.len = 4;
+  } else {
+    rp.rune = 0;
+    rp.len = 0;
+  }
+  return rp;
+}
+
+inline bool DecodeRunesInString(const char* s, size_t len, RuneStrArray& runes) {
+  runes.clear();
+  runes.reserve(len / 2);
+  for (uint32_t i = 0, j = 0; i < len;) {
+    RuneStrLite rp = DecodeRuneInString(s + i, len - i);
+    if (rp.len == 0) {
+      runes.clear();
+      return false;
+    }
+    RuneStr x(rp.rune, i, rp.len, j, 1);
+    runes.push_back(x);
+    i += rp.len;
+    ++j;
+  }
+  return true;
+}
+
+inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
+  return DecodeRunesInString(s.c_str(), s.size(), runes);
+}
+
+inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
+  unicode.clear();
+  RuneStrArray runes;
+  if (!DecodeRunesInString(s, len, runes)) {
+    return false;
+  }
+  unicode.reserve(runes.size());
+  for (size_t i = 0; i < runes.size(); i++) {
+    unicode.push_back(runes[i].rune);
+  }
+  return true;
+}
+
+inline bool IsSingleWord(const string& str) {
+  RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size());
+  return rp.len == str.size();
+}
+
+inline bool DecodeRunesInString(const string& s, Unicode& unicode) {
+  return DecodeRunesInString(s.c_str(), s.size(), unicode);
+}
+
+inline Unicode DecodeRunesInString(const string& s) {
+  Unicode result;
+  DecodeRunesInString(s, result);
+  return result;
+}
+
+
+// [left, right]
+inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
+  assert(right->offset >= left->offset);
+  uint32_t len = right->offset - left->offset + right->len;
+  uint32_t unicode_length = right->unicode_offset - left->unicode_offset + right->unicode_length;
+  return Word(s.substr(left->offset, len), left->offset, left->unicode_offset, unicode_length);
+}
+
+inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
+  assert(right->offset >= left->offset);
+  uint32_t len = right->offset - left->offset + right->len;
+  return s.substr(left->offset, len);
+}
+
+inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
+  for (size_t i = 0; i < wrs.size(); i++) {
+    words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
+  }
+}
+
+inline vector<Word> GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs) {
+  vector<Word> result;
+  GetWordsFromWordRanges(s, wrs, result);
+  return result;
+}
+
+inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
+  strs.resize(words.size());
+  for (size_t i = 0; i < words.size(); ++i) {
+    strs[i] = words[i].word;
+  }
+}
+
+} // namespace cppjieba
+
+#endif // CPPJIEBA_UNICODE_H
--- a/libchinese-segmentation/cppjieba/cppjieba.pri
+++ b/libchinese-segmentation/cppjieba/cppjieba.pri
@ -0,0 +1,21 @@
+INCLUDEPATH += $$PWD
+
+HEADERS += \
+    $$PWD/DictTrie.hpp \
+    $$PWD/FullSegment.hpp \
+    $$PWD/HMMModel.hpp \
+    $$PWD/HMMSegment.hpp \
+    $$PWD/Jieba.hpp \
+    $$PWD/KeywordExtractor.hpp \
+    $$PWD/MPSegment.hpp \
+    $$PWD/MixSegment.hpp \
+    $$PWD/PosTagger.hpp \
+    $$PWD/PreFilter.hpp \
+    $$PWD/QuerySegment.hpp \
+    $$PWD/SegmentBase.hpp \
+    $$PWD/SegmentTagged.hpp \
+    $$PWD/TextRankExtractor.hpp \
+    $$PWD/Trie.hpp \
+    $$PWD/Unicode.hpp
+
+include(limonp/limonp.pri)
--- a/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ArgvContext.hpp
@ -0,0 +1,70 @@
+/************************************
+ * file enc : ascii
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+
+#ifndef LIMONP_ARGV_FUNCTS_H
+#define LIMONP_ARGV_FUNCTS_H
+
+#include <set>
+#include <sstream>
+#include "StringUtil.hpp"
+
+namespace limonp {
+
+using namespace std;
+
+class ArgvContext {
+ public :
+  ArgvContext(int argc, const char* const * argv) {
+    for(int i = 0; i < argc; i++) {
+      if(StartsWith(argv[i], "-")) {
+        if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
+          mpss_[argv[i]] = argv[i+1];
+          i++;
+        } else {
+          sset_.insert(argv[i]);
+        }
+      } else {
+        args_.push_back(argv[i]);
+      }
+    }
+  }
+  ~ArgvContext() {
+  }
+
+  friend ostream& operator << (ostream& os, const ArgvContext& args);
+  string operator [](size_t i) const {
+    if(i < args_.size()) {
+      return args_[i];
+    }
+    return "";
+  }
+  string operator [](const string& key) const {
+    map<string, string>::const_iterator it = mpss_.find(key);
+    if(it != mpss_.end()) {
+      return it->second;
+    }
+    return "";
+  }
+
+  bool HasKey(const string& key) const {
+    if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
+      return true;
+    }
+    return false;
+  }
+
+ private:
+  vector<string> args_;
+  map<string, string> mpss_;
+  set<string> sset_;
+}; // class ArgvContext
+
+inline ostream& operator << (ostream& os, const ArgvContext& args) {
+  return os<<args.args_<<args.mpss_<<args.sset_;
+}
+
+} // namespace limonp
+
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/BlockingQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BlockingQueue.hpp
@ -0,0 +1,49 @@
+#ifndef LIMONP_BLOCKINGQUEUE_HPP
+#define LIMONP_BLOCKINGQUEUE_HPP
+
+#include <queue>
+#include "Condition.hpp"
+
+namespace limonp {
+template<class T>
+class BlockingQueue: NonCopyable {
+ public:
+  BlockingQueue()
+    : mutex_(), notEmpty_(mutex_), queue_() {
+  }
+
+  void Push(const T& x) {
+    MutexLockGuard lock(mutex_);
+    queue_.push(x);
+    notEmpty_.Notify(); // Wait morphing saves us
+  }
+
+  T Pop() {
+    MutexLockGuard lock(mutex_);
+    // always use a while-loop, due to spurious wakeup
+    while (queue_.empty()) {
+      notEmpty_.Wait();
+    }
+    assert(!queue_.empty());
+    T front(queue_.front());
+    queue_.pop();
+    return front;
+  }
+
+  size_t Size() const {
+    MutexLockGuard lock(mutex_);
+    return queue_.size();
+  }
+  bool Empty() const {
+    return Size() == 0;
+  }
+
+ private:
+  mutable MutexLock mutex_;
+  Condition         notEmpty_;
+  std::queue<T>     queue_;
+}; // class BlockingQueue
+
+} // namespace limonp
+
+#endif // LIMONP_BLOCKINGQUEUE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BoundedBlockingQueue.hpp
@ -0,0 +1,67 @@
+#ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
+#define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
+
+#include "BoundedQueue.hpp"
+
+namespace limonp {
+
+template<typename T>
+class BoundedBlockingQueue : NonCopyable {
+ public:
+  explicit BoundedBlockingQueue(size_t maxSize)
+    : mutex_(),
+      notEmpty_(mutex_),
+      notFull_(mutex_),
+      queue_(maxSize) {
+  }
+
+  void Push(const T& x) {
+    MutexLockGuard lock(mutex_);
+    while (queue_.Full()) {
+      notFull_.Wait();
+    }
+    assert(!queue_.Full());
+    queue_.Push(x);
+    notEmpty_.Notify();
+  }
+
+  T Pop() {
+    MutexLockGuard lock(mutex_);
+    while (queue_.Empty()) {
+      notEmpty_.Wait();
+    }
+    assert(!queue_.Empty());
+    T res = queue_.Pop();
+    notFull_.Notify();
+    return res;
+  }
+
+  bool Empty() const {
+    MutexLockGuard lock(mutex_);
+    return queue_.Empty();
+  }
+
+  bool Full() const {
+    MutexLockGuard lock(mutex_);
+    return queue_.Full();
+  }
+
+  size_t size() const {
+    MutexLockGuard lock(mutex_);
+    return queue_.size();
+  }
+
+  size_t capacity() const {
+    return queue_.capacity();
+  }
+
+ private:
+  mutable MutexLock          mutex_;
+  Condition                  notEmpty_;
+  Condition                  notFull_;
+  BoundedQueue<T>  queue_;
+}; // class BoundedBlockingQueue
+
+} // namespace limonp
+
+#endif // LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/BoundedQueue.hpp
@ -0,0 +1,65 @@
+#ifndef LIMONP_BOUNDED_QUEUE_HPP
+#define LIMONP_BOUNDED_QUEUE_HPP
+
+#include <vector>
+#include <fstream>
+#include <cassert>
+
+namespace limonp {
+using namespace std;
+template<class T>
+class BoundedQueue {
+ public:
+  explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
+    head_ = 0;
+    tail_ = 0;
+    size_ = 0;
+    assert(capacity_);
+  }
+  ~BoundedQueue() {
+  }
+
+  void Clear() {
+    head_ = 0;
+    tail_ = 0;
+    size_ = 0;
+  }
+  bool Empty() const {
+    return !size_;
+  }
+  bool Full() const {
+    return capacity_ == size_;
+  }
+  size_t Size() const {
+    return size_;
+  }
+  size_t Capacity() const {
+    return capacity_;
+  }
+
+  void Push(const T& t) {
+    assert(!Full());
+    circular_buffer_[tail_] = t;
+    tail_ = (tail_ + 1) % capacity_;
+    size_ ++;
+  }
+
+  T Pop() {
+    assert(!Empty());
+    size_t oldPos = head_;
+    head_ = (head_ + 1) % capacity_;
+    size_ --;
+    return circular_buffer_[oldPos];
+  }
+
+ private:
+  size_t head_;
+  size_t tail_;
+  size_t size_;
+  const size_t capacity_;
+  vector<T> circular_buffer_;
+
+}; // class BoundedQueue
+} // namespace limonp
+
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/Closure.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Closure.hpp
@ -0,0 +1,206 @@
+#ifndef LIMONP_CLOSURE_HPP
+#define LIMONP_CLOSURE_HPP
+
+namespace limonp {
+
+class ClosureInterface {
+ public:
+  virtual ~ClosureInterface() {
+  }
+  virtual void Run() = 0;
+};
+
+template <class Funct>
+class Closure0: public ClosureInterface {
+ public:
+  Closure0(Funct fun) {
+    fun_ = fun;
+  }
+  virtual ~Closure0() {
+  }
+  virtual void Run() {
+    (*fun_)();
+  }
+ private:
+  Funct fun_;
+}; 
+
+template <class Funct, class Arg1>
+class Closure1: public ClosureInterface {
+ public:
+  Closure1(Funct fun, Arg1 arg1) {
+    fun_ = fun;
+    arg1_ = arg1;
+  }
+  virtual ~Closure1() {
+  }
+  virtual void Run() {
+    (*fun_)(arg1_);
+  }
+ private:
+  Funct fun_;
+  Arg1 arg1_;
+}; 
+
+template <class Funct, class Arg1, class Arg2>
+class Closure2: public ClosureInterface {
+ public:
+  Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
+    fun_ = fun;
+    arg1_ = arg1;
+    arg2_ = arg2;
+  }
+  virtual ~Closure2() {
+  }
+  virtual void Run() {
+    (*fun_)(arg1_, arg2_);
+  }
+ private:
+  Funct fun_;
+  Arg1 arg1_;
+  Arg2 arg2_;
+}; 
+
+template <class Funct, class Arg1, class Arg2, class Arg3>
+class Closure3: public ClosureInterface {
+ public:
+  Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
+    fun_ = fun;
+    arg1_ = arg1;
+    arg2_ = arg2;
+    arg3_ = arg3;
+  }
+  virtual ~Closure3() {
+  }
+  virtual void Run() {
+    (*fun_)(arg1_, arg2_, arg3_);
+  }
+ private:
+  Funct fun_;
+  Arg1 arg1_;
+  Arg2 arg2_;
+  Arg3 arg3_;
+}; 
+
+template <class Obj, class Funct> 
+class ObjClosure0: public ClosureInterface {
+ public:
+  ObjClosure0(Obj* p, Funct fun) {
+   p_ = p;
+   fun_ = fun;
+  }
+  virtual ~ObjClosure0() {
+  }
+  virtual void Run() {
+    (p_->*fun_)();
+  }
+ private:
+  Obj* p_;
+  Funct fun_;
+}; 
+
+template <class Obj, class Funct, class Arg1> 
+class ObjClosure1: public ClosureInterface {
+ public:
+  ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
+   p_ = p;
+   fun_ = fun;
+   arg1_ = arg1;
+  }
+  virtual ~ObjClosure1() {
+  }
+  virtual void Run() {
+    (p_->*fun_)(arg1_);
+  }
+ private:
+  Obj* p_;
+  Funct fun_;
+  Arg1 arg1_;
+}; 
+
+template <class Obj, class Funct, class Arg1, class Arg2> 
+class ObjClosure2: public ClosureInterface {
+ public:
+  ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
+   p_ = p;
+   fun_ = fun;
+   arg1_ = arg1;
+   arg2_ = arg2;
+  }
+  virtual ~ObjClosure2() {
+  }
+  virtual void Run() {
+    (p_->*fun_)(arg1_, arg2_);
+  }
+ private:
+  Obj* p_;
+  Funct fun_;
+  Arg1 arg1_;
+  Arg2 arg2_;
+}; 
+template <class Obj, class Funct, class Arg1, class Arg2, class Arg3> 
+class ObjClosure3: public ClosureInterface {
+ public:
+  ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
+   p_ = p;
+   fun_ = fun;
+   arg1_ = arg1;
+   arg2_ = arg2;
+   arg3_ = arg3;
+  }
+  virtual ~ObjClosure3() {
+  }
+  virtual void Run() {
+    (p_->*fun_)(arg1_, arg2_, arg3_);
+  }
+ private:
+  Obj* p_;
+  Funct fun_;
+  Arg1 arg1_;
+  Arg2 arg2_;
+  Arg3 arg3_;
+}; 
+
+template<class R>
+ClosureInterface* NewClosure(R (*fun)()) {
+  return new Closure0<R (*)()>(fun);
+}
+
+template<class R, class Arg1>
+ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) {
+  return new Closure1<R (*)(Arg1), Arg1>(fun, arg1);
+}
+
+template<class R, class Arg1, class Arg2>
+ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
+  return new Closure2<R (*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
+}
+
+template<class R, class Arg1, class Arg2, class Arg3>
+ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
+  return new Closure3<R (*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
+}
+
+template<class R, class Obj>
+ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) {
+  return new ObjClosure0<Obj, R (Obj::* )()>(obj, fun);
+}
+
+template<class R, class Obj, class Arg1>
+ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) {
+  return new ObjClosure1<Obj, R (Obj::* )(Arg1), Arg1>(obj, fun, arg1);
+}
+
+template<class R, class Obj, class Arg1, class Arg2>
+ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
+  return new ObjClosure2<Obj, R (Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
+}
+
+template<class R, class Obj, class Arg1, class Arg2, class Arg3>
+ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
+  return new ObjClosure3<Obj, R (Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
+}
+
+} // namespace limonp
+
+#endif // LIMONP_CLOSURE_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Colors.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Colors.hpp
@ -0,0 +1,31 @@
+#ifndef LIMONP_COLOR_PRINT_HPP
+#define LIMONP_COLOR_PRINT_HPP
+
+#include <string>
+#include <stdarg.h>
+
+namespace limonp {
+
+using std::string;
+
+enum Color {
+  BLACK = 30,
+  RED,
+  GREEN,
+  YELLOW,
+  BLUE,
+  PURPLE
+}; // enum Color
+
+static void ColorPrintln(enum Color color, const char * fmt, ...) {
+  va_list ap;
+  printf("\033[0;%dm", color);
+  va_start(ap, fmt);
+  vprintf(fmt, ap);
+  va_end(ap);
+  printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
+}
+
+} // namespace limonp
+
+#endif // LIMONP_COLOR_PRINT_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Condition.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Condition.hpp
@ -0,0 +1,38 @@
+#ifndef LIMONP_CONDITION_HPP
+#define LIMONP_CONDITION_HPP
+
+#include "MutexLock.hpp"
+
+namespace limonp {
+
+class Condition : NonCopyable {
+ public:
+  explicit Condition(MutexLock& mutex)
+    : mutex_(mutex) {
+    XCHECK(!pthread_cond_init(&pcond_, NULL));
+  }
+
+  ~Condition() {
+    XCHECK(!pthread_cond_destroy(&pcond_));
+  }
+
+  void Wait() {
+    XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
+  }
+
+  void Notify() {
+    XCHECK(!pthread_cond_signal(&pcond_));
+  }
+
+  void NotifyAll() {
+    XCHECK(!pthread_cond_broadcast(&pcond_));
+  }
+
+ private:
+  MutexLock& mutex_;
+  pthread_cond_t pcond_;
+}; // class Condition
+
+} // namespace limonp
+
+#endif // LIMONP_CONDITION_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Config.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Config.hpp
@ -0,0 +1,103 @@
+/************************************
+ * file enc : utf8
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+#ifndef LIMONP_CONFIG_H
+#define LIMONP_CONFIG_H
+
+#include <map>
+#include <fstream>
+#include <iostream>
+#include <assert.h>
+#include "StringUtil.hpp"
+
+namespace limonp {
+
+using namespace std;
+
+class Config {
+ public:
+  explicit Config(const string& filePath) {
+    LoadFile(filePath);
+  }
+
+  operator bool () {
+    return !map_.empty();
+  }
+
+  string Get(const string& key, const string& defaultvalue) const {
+    map<string, string>::const_iterator it = map_.find(key);
+    if(map_.end() != it) {
+      return it->second;
+    }
+    return defaultvalue;
+  }
+  int Get(const string& key, int defaultvalue) const {
+    string str = Get(key, "");
+    if("" == str) {
+      return defaultvalue;
+    }
+    return atoi(str.c_str());
+  }
+  const char* operator [] (const char* key) const {
+    if(NULL == key) {
+      return NULL;
+    }
+    map<string, string>::const_iterator it = map_.find(key);
+    if(map_.end() != it) {
+      return it->second.c_str();
+    }
+    return NULL;
+  }
+
+  string GetConfigInfo() const {
+    string res;
+    res << *this;
+    return res;
+  }
+
+ private:
+  void LoadFile(const string& filePath) {
+    ifstream ifs(filePath.c_str());
+    assert(ifs);
+    string line;
+    vector<string> vecBuf;
+    size_t lineno = 0;
+    while(getline(ifs, line)) {
+      lineno ++;
+      Trim(line);
+      if(line.empty() || StartsWith(line, "#")) {
+        continue;
+      }
+      vecBuf.clear();
+      Split(line, vecBuf, "=");
+      if(2 != vecBuf.size()) {
+        fprintf(stderr, "line[%s] illegal.\n", line.c_str());
+        assert(false);
+        continue;
+      }
+      string& key = vecBuf[0];
+      string& value = vecBuf[1];
+      Trim(key);
+      Trim(value);
+      if(!map_.insert(make_pair(key, value)).second) {
+        fprintf(stderr, "key[%s] already exits.\n", key.c_str());
+        assert(false);
+        continue;
+      }
+    }
+    ifs.close();
+  }
+
+  friend ostream& operator << (ostream& os, const Config& config);
+
+  map<string, string> map_;
+}; // class Config
+
+inline ostream& operator << (ostream& os, const Config& config) {
+  return os << config.map_;
+}
+
+} // namespace limonp
+
+#endif // LIMONP_CONFIG_H
--- a/libchinese-segmentation/cppjieba/limonp/FileLock.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/FileLock.hpp
@ -0,0 +1,74 @@
+#ifndef LIMONP_FILELOCK_HPP
+#define LIMONP_FILELOCK_HPP
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string>
+#include <string.h>
+#include <assert.h>
+
+namespace limonp {
+
+using std::string;
+
+class FileLock {
+ public:
+  FileLock() : fd_(-1), ok_(true) {
+  }
+  ~FileLock() {
+    if(fd_ > 0) {
+      Close();
+    }
+  }
+  void Open(const string& fname) {
+    assert(fd_ == -1);
+    fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
+    if(fd_ < 0) {
+      ok_ = false;
+      err_ = strerror(errno);
+    }
+  }
+  void Close() {
+    ::close(fd_);
+  }
+  void Lock() {
+    if(LockOrUnlock(fd_, true) < 0) {
+      ok_ = false;
+      err_ = strerror(errno);
+    }
+  }
+  void UnLock() {
+    if(LockOrUnlock(fd_, false) < 0) {
+      ok_ = false;
+      err_ = strerror(errno);
+    }
+  }
+  bool Ok() const {
+    return ok_;
+  }
+  string Error() const {
+    return err_;
+  }
+ private:
+  static int LockOrUnlock(int fd, bool lock) {
+    errno = 0;
+    struct flock f;
+    memset(&f, 0, sizeof(f));
+    f.l_type = (lock ? F_WRLCK : F_UNLCK);
+    f.l_whence = SEEK_SET;
+    f.l_start = 0;
+    f.l_len = 0;        // Lock/unlock entire file
+    return fcntl(fd, F_SETLK, &f);
+  }
+
+  int fd_;
+  bool ok_;
+  string err_;
+}; // class FileLock
+
+}// namespace limonp
+
+#endif // LIMONP_FILELOCK_HPP
--- a/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ForcePublic.hpp
@ -0,0 +1,7 @@
+#ifndef LIMONP_FORCE_PUBLIC_H
+#define LIMONP_FORCE_PUBLIC_H
+
+#define private public
+#define protected public
+
+#endif // LIMONP_FORCE_PUBLIC_H
--- a/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/LocalVector.hpp
@ -0,0 +1,139 @@
+#ifndef LIMONP_LOCAL_VECTOR_HPP
+#define LIMONP_LOCAL_VECTOR_HPP
+
+#include <iostream>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+namespace limonp {
+using namespace std;
+/*
+ * LocalVector<T> : T must be primitive type (char , int, size_t), if T is struct or class, LocalVector<T> may be dangerous..
+ * LocalVector<T> is simple and not well-tested.
+ */
+const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
+template <class T>
+class LocalVector {
+ public:
+  typedef const T* const_iterator ;
+  typedef T value_type;
+  typedef size_t size_type;
+ private:
+  T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
+  T * ptr_;
+  size_t size_;
+  size_t capacity_;
+ public:
+  LocalVector() {
+    init_();
+  };
+  LocalVector(const LocalVector<T>& vec) {
+    init_();
+    *this = vec;
+  }
+  LocalVector(const_iterator  begin, const_iterator end) { // TODO: make it faster
+    init_();
+    while(begin != end) {
+      push_back(*begin++);
+    }
+  }
+  LocalVector(size_t size, const T& t) { // TODO: make it faster
+    init_();
+    while(size--) {
+      push_back(t);
+    }
+  }
+  ~LocalVector() {
+    if(ptr_ != buffer_) {
+      free(ptr_);
+    }
+  };
+ public:
+  LocalVector<T>& operator = (const LocalVector<T>& vec) {
+    clear();
+    size_ = vec.size();
+    capacity_ = vec.capacity();
+    if(vec.buffer_ == vec.ptr_) {
+      memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
+      ptr_ = buffer_;
+    } else {
+      ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
+      assert(ptr_);
+      memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
+    }
+    return *this;
+  }
+ private:
+  void init_() {
+    ptr_ = buffer_;
+    size_ = 0;
+    capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
+  }
+ public:
+  T& operator [] (size_t i) {
+    return ptr_[i];
+  }
+  const T& operator [] (size_t i) const {
+    return ptr_[i];
+  }
+  void push_back(const T& t) {
+    if(size_ == capacity_) {
+      assert(capacity_);
+      reserve(capacity_ * 2);
+    }
+    ptr_[size_ ++ ] = t;
+  }
+  void reserve(size_t size) {
+    if(size <= capacity_) {
+      return;
+    }
+    T * next =  (T*)malloc(sizeof(T) * size);
+    assert(next);
+    T * old = ptr_;
+    ptr_ = next;
+    memcpy(ptr_, old, sizeof(T) * capacity_);
+    capacity_ = size;
+    if(old != buffer_) {
+      free(old);
+    }
+  }
+  bool empty() const {
+    return 0 == size();
+  }
+  size_t size() const {
+    return size_;
+  }
+  size_t capacity() const {
+    return capacity_;
+  }
+  const_iterator begin() const {
+    return ptr_;
+  }
+  const_iterator end() const {
+    return ptr_ + size_;
+  }
+  void clear() {
+    if(ptr_ != buffer_) {
+      free(ptr_);
+    }
+    init_();
+  }
+};
+
+template <class T>
+ostream & operator << (ostream& os, const LocalVector<T>& vec) {
+  if(vec.empty()) {
+    return os << "[]";
+  }
+  os<<"[\""<<vec[0];
+  for(size_t i = 1; i < vec.size(); i++) {
+    os<<"\", \""<<vec[i];
+  }
+  os<<"\"]";
+  return os;
+}
+
+}
+
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/Logging.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Logging.hpp
@ -0,0 +1,76 @@
+#ifndef LIMONP_LOGGING_HPP
+#define LIMONP_LOGGING_HPP
+
+#include <sstream>
+#include <iostream>
+#include <cassert>
+#include <cstdlib>
+#include <ctime>
+
+#ifdef XLOG
+#error "XLOG has been defined already"
+#endif // XLOG
+#ifdef XCHECK
+#error "XCHECK has been defined already"
+#endif // XCHECK
+
+#define XLOG(level) limonp::Logger(limonp::LL_##level, __FILE__, __LINE__).Stream() 
+#define XCHECK(exp) if(!(exp)) XLOG(FATAL) << "exp: ["#exp << "] false. "
+
+namespace limonp {
+
+enum {
+  LL_DEBUG = 0, 
+  LL_INFO = 1, 
+  LL_WARNING = 2, 
+  LL_ERROR = 3, 
+  LL_FATAL = 4,
+}; // enum
+
+static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
+static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S";
+
+class Logger {
+ public:
+  Logger(size_t level, const char* filename, int lineno)
+   : level_(level) {
+#ifdef LOGGING_LEVEL
+     if (level_ < LOGGING_LEVEL) {
+       return;
+     }
+#endif
+    assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
+    char buf[32];
+    time_t now;
+    time(&now);
+    strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&now));
+    stream_ << buf 
+      << " " << filename 
+      << ":" << lineno 
+      << " " << LOG_LEVEL_ARRAY[level_] 
+      << " ";
+  }
+  ~Logger() {
+#ifdef LOGGING_LEVEL
+     if (level_ < LOGGING_LEVEL) {
+       return;
+     }
+#endif
+    std::cerr << stream_.str() << std::endl;
+    if (level_ == LL_FATAL) {
+      abort();
+    }
+  }
+
+  std::ostream& Stream() {
+    return stream_;
+  }
+
+ private:
+  std::ostringstream stream_;
+  size_t level_;
+}; // class Logger
+
+} // namespace limonp
+
+#endif // LIMONP_LOGGING_HPP
--- a/libchinese-segmentation/cppjieba/limonp/Md5.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Md5.hpp
@ -0,0 +1,411 @@
+#ifndef __MD5_H__
+#define __MD5_H__
+
+// Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+// rights reserved.
+
+// License to copy and use this software is granted provided that it
+// is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+// Algorithm" in all material mentioning or referencing this software
+// or this function.
+//
+// License is also granted to make and use derivative works provided
+// that such works are identified as "derived from the RSA Data
+// Security, Inc. MD5 Message-Digest Algorithm" in all material
+// mentioning or referencing the derived work.
+//
+// RSA Data Security, Inc. makes no representations concerning either
+// the merchantability of this software or the suitability of this
+// software for any particular purpose. It is provided "as is"
+// without express or implied warranty of any kind.
+//
+// These notices must be retained in any copies of any part of this
+// documentation and/or software.
+
+
+
+// The original md5 implementation avoids external libraries.
+// This version has dependency on stdio.h for file input and
+// string.h for memcpy.
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+
+namespace limonp {
+
+//#pragma region MD5 defines
+// Constants for MD5Transform routine.
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+
+// F, G, H and I are basic MD5 functions.
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+// ROTATE_LEFT rotates x left n bits.
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+
+// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+// Rotation is separate from addition to prevent recomputation.
+#define FF(a, b, c, d, x, s, ac) { \
+  (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+#define GG(a, b, c, d, x, s, ac) { \
+  (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+#define HH(a, b, c, d, x, s, ac) { \
+  (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+#define II(a, b, c, d, x, s, ac) { \
+  (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
+  (a) = ROTATE_LEFT ((a), (s)); \
+  (a) += (b); \
+  }
+//#pragma endregion
+
+
+typedef unsigned char BYTE ;
+
+// POINTER defines a generic pointer type
+typedef unsigned char *POINTER;
+
+// UINT2 defines a two byte word
+typedef unsigned short int UINT2;
+
+// UINT4 defines a four byte word
+typedef unsigned int UINT4;
+
+static unsigned char PADDING[64] = {
+  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+// convenient object that wraps
+// the C-functions for use in C++ only
+class MD5 {
+ private:
+  struct __context_t {
+    UINT4 state[4];                                   /* state (ABCD) */
+    UINT4 count[2];        /* number of bits, modulo 2^64 (lsb first) */
+    unsigned char buffer[64];                         /* input buffer */
+  } context ;
+
+  //#pragma region static helper functions
+  // The core of the MD5 algorithm is here.
+  // MD5 basic transformation. Transforms state based on block.
+  static void MD5Transform( UINT4 state[4], unsigned char block[64] ) {
+    UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+
+    Decode (x, block, 64);
+
+    /* Round 1 */
+    FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
+    FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
+    FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
+    FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
+    FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
+    FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
+    FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
+    FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
+    FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
+    FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
+    FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
+    FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
+    FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
+    FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
+    FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
+    FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
+
+    /* Round 2 */
+    GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
+    GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
+    GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
+    GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
+    GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
+    GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
+    GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
+    GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
+    GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
+    GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
+    GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
+    GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
+    GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
+    GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
+    GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
+    GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
+
+    /* Round 3 */
+    HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
+    HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
+    HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
+    HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
+    HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
+    HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
+    HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
+    HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
+    HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
+    HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
+    HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
+    HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
+    HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
+    HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
+    HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
+    HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
+
+    /* Round 4 */
+    II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
+    II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
+    II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
+    II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
+    II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
+    II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
+    II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
+    II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
+    II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
+    II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
+    II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
+    II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
+    II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
+    II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
+    II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
+    II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
+
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+
+    // Zeroize sensitive information.
+    memset((POINTER)x, 0, sizeof (x));
+  }
+
+  // Encodes input (UINT4) into output (unsigned char). Assumes len is
+  // a multiple of 4.
+  static void Encode( unsigned char *output, UINT4 *input, unsigned int len ) {
+    unsigned int i, j;
+
+    for (i = 0, j = 0; j < len; i++, j += 4) {
+      output[j] = (unsigned char)(input[i] & 0xff);
+      output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
+      output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
+      output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
+    }
+  }
+
+  // Decodes input (unsigned char) into output (UINT4). Assumes len is
+  // a multiple of 4.
+  static void Decode( UINT4 *output, unsigned char *input, unsigned int len ) {
+    unsigned int i, j;
+
+    for (i = 0, j = 0; j < len; i++, j += 4)
+      output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
+                  (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
+  }
+  //#pragma endregion
+
+
+ public:
+  // MAIN FUNCTIONS
+  MD5() {
+    Init() ;
+  }
+
+  // MD5 initialization. Begins an MD5 operation, writing a new context.
+  void Init() {
+    context.count[0] = context.count[1] = 0;
+
+    // Load magic initialization constants.
+    context.state[0] = 0x67452301;
+    context.state[1] = 0xefcdab89;
+    context.state[2] = 0x98badcfe;
+    context.state[3] = 0x10325476;
+  }
+
+  // MD5 block update operation. Continues an MD5 message-digest
+  // operation, processing another message block, and updating the
+  // context.
+  void Update(
+    unsigned char *input,   // input block
+    unsigned int inputLen ) { // length of input block
+    unsigned int i, index, partLen;
+
+    // Compute number of bytes mod 64
+    index = (unsigned int)((context.count[0] >> 3) & 0x3F);
+
+    // Update number of bits
+    if ((context.count[0] += ((UINT4)inputLen << 3))
+        < ((UINT4)inputLen << 3))
+      context.count[1]++;
+    context.count[1] += ((UINT4)inputLen >> 29);
+
+    partLen = 64 - index;
+
+    // Transform as many times as possible.
+    if (inputLen >= partLen) {
+      memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
+      MD5Transform (context.state, context.buffer);
+
+      for (i = partLen; i + 63 < inputLen; i += 64)
+        MD5Transform (context.state, &input[i]);
+
+      index = 0;
+    } else
+      i = 0;
+
+    /* Buffer remaining input */
+    memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen-i);
+  }
+
+  // MD5 finalization. Ends an MD5 message-digest operation, writing the
+  // the message digest and zeroizing the context.
+  // Writes to digestRaw
+  void Final() {
+    unsigned char bits[8];
+    unsigned int index, padLen;
+
+    // Save number of bits
+    Encode( bits, context.count, 8 );
+
+    // Pad out to 56 mod 64.
+    index = (unsigned int)((context.count[0] >> 3) & 0x3f);
+    padLen = (index < 56) ? (56 - index) : (120 - index);
+    Update( PADDING, padLen );
+
+    // Append length (before padding)
+    Update( bits, 8 );
+
+    // Store state in digest
+    Encode( digestRaw, context.state, 16);
+
+    // Zeroize sensitive information.
+    memset((POINTER)&context, 0, sizeof (context));
+
+    writeToString() ;
+  }
+
+  /// Buffer must be 32+1 (nul) = 33 chars long at least
+  void writeToString() {
+    int pos ;
+
+    for( pos = 0 ; pos < 16 ; pos++ )
+      sprintf( digestChars+(pos*2), "%02x", digestRaw[pos] ) ;
+  }
+
+
+ public:
+  // an MD5 digest is a 16-byte number (32 hex digits)
+  BYTE digestRaw[ 16 ] ;
+
+  // This version of the digest is actually
+  // a "printf'd" version of the digest.
+  char digestChars[ 33 ] ;
+
+  /// Load a file from disk and digest it
+  // Digests a file and returns the result.
+  const char* digestFile( const char *filename ) {
+    if (NULL == filename || strcmp(filename, "") == 0)
+      return NULL;
+
+    Init() ;
+
+    FILE *file;
+
+    unsigned char buffer[1024] ;
+
+    if((file = fopen (filename, "rb")) == NULL) {
+      return NULL;
+    }
+    int len;
+    while( (len = fread( buffer, 1, 1024, file )) )
+      Update( buffer, len ) ;
+    Final();
+
+    fclose( file );
+
+    return digestChars ;
+  }
+
+  /// Digests a byte-array already in memory
+  const char* digestMemory( BYTE *memchunk, int len ) {
+    if (NULL == memchunk)
+      return NULL;
+
+    Init() ;
+    Update( memchunk, len ) ;
+    Final() ;
+
+    return digestChars ;
+  }
+
+  // Digests a string and prints the result.
+  const char* digestString(const char *string ) {
+    if (string == NULL)
+      return NULL;
+
+    Init() ;
+    Update( (unsigned char*)string, strlen(string) ) ;
+    Final() ;
+
+    return digestChars ;
+  }
+};
+
+inline bool md5String(const char* str, std::string& res) {
+  if (NULL == str) {
+    res = "";
+    return false;
+  }
+
+  MD5 md5;
+  const char *pRes = md5.digestString(str);
+  if (NULL == pRes) {
+    res = "";
+    return false;
+  }
+
+  res = pRes;
+  return true;
+}
+
+inline bool md5File(const char* filepath, std::string& res) {
+  if (NULL == filepath || strcmp(filepath, "") == 0) {
+    res = "";
+    return false;
+  }
+
+  MD5 md5;
+  const char *pRes = md5.digestFile(filepath);
+
+  if (NULL == pRes) {
+    res = "";
+    return false;
+  }
+
+  res = pRes;
+  return true;
+}
+}
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/MutexLock.hpp
@ -0,0 +1,51 @@
+#ifndef LIMONP_MUTEX_LOCK_HPP
+#define LIMONP_MUTEX_LOCK_HPP
+
+#include <pthread.h>
+#include "NonCopyable.hpp"
+#include "Logging.hpp"
+
+namespace limonp {
+
+class MutexLock: NonCopyable {
+ public:
+  MutexLock() {
+    XCHECK(!pthread_mutex_init(&mutex_, NULL));
+  }
+  ~MutexLock() {
+    XCHECK(!pthread_mutex_destroy(&mutex_));
+  }
+  pthread_mutex_t* GetPthreadMutex() {
+    return &mutex_;
+  }
+
+ private:
+  void Lock() {
+    XCHECK(!pthread_mutex_lock(&mutex_));
+  }
+  void Unlock() {
+    XCHECK(!pthread_mutex_unlock(&mutex_));
+  }
+  friend class MutexLockGuard;
+
+  pthread_mutex_t mutex_;
+}; // class MutexLock
+
+class MutexLockGuard: NonCopyable {
+ public:
+  explicit MutexLockGuard(MutexLock & mutex)
+    : mutex_(mutex) {
+    mutex_.Lock();
+  }
+  ~MutexLockGuard() {
+    mutex_.Unlock();
+  }
+ private:
+  MutexLock & mutex_;
+}; // class MutexLockGuard
+
+#define MutexLockGuard(x) XCHECK(false);
+
+} // namespace limonp
+
+#endif // LIMONP_MUTEX_LOCK_HPP
--- a/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/NonCopyable.hpp
@ -0,0 +1,21 @@
+/************************************
+ ************************************/
+#ifndef LIMONP_NONCOPYABLE_H
+#define LIMONP_NONCOPYABLE_H
+
+namespace limonp {
+
+class NonCopyable {
+ protected:
+  NonCopyable() {
+  }
+  ~NonCopyable() {
+  }
+ private:
+  NonCopyable(const NonCopyable& );
+  const NonCopyable& operator=(const NonCopyable& );
+}; // class NonCopyable
+
+} // namespace limonp
+
+#endif // LIMONP_NONCOPYABLE_H
--- a/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/StdExtension.hpp
@ -0,0 +1,157 @@
+#ifndef LIMONP_STD_EXTEMSION_HPP
+#define LIMONP_STD_EXTEMSION_HPP
+
+#include <map>
+
+#ifdef __APPLE__
+#include <unordered_map>
+#include <unordered_set>
+#elif(__cplusplus >= 201103L)
+#include <unordered_map>
+#include <unordered_set>
+#elif defined _MSC_VER
+#include <unordered_map>
+#include <unordered_set>
+#else
+#include <tr1/unordered_map>
+#include <tr1/unordered_set>
+namespace std {
+using std::tr1::unordered_map;
+using std::tr1::unordered_set;
+}
+
+#endif
+
+#include <set>
+#include <string>
+#include <vector>
+#include <deque>
+#include <fstream>
+#include <sstream>
+
+namespace std {
+
+template<typename T>
+ostream& operator << (ostream& os, const vector<T>& v) {
+  if(v.empty()) {
+    return os << "[]";
+  }
+  os<<"["<<v[0];
+  for(size_t i = 1; i < v.size(); i++) {
+    os<<", "<<v[i];
+  }
+  os<<"]";
+  return os;
+}
+
+template<>
+inline ostream& operator << (ostream& os, const vector<string>& v) {
+  if(v.empty()) {
+    return os << "[]";
+  }
+  os<<"[\""<<v[0];
+  for(size_t i = 1; i < v.size(); i++) {
+    os<<"\", \""<<v[i];
+  }
+  os<<"\"]";
+  return os;
+}
+
+template<typename T>
+ostream& operator << (ostream& os, const deque<T>& dq) {
+  if(dq.empty()) {
+    return os << "[]";
+  }
+  os<<"[\""<<dq[0];
+  for(size_t i = 1; i < dq.size(); i++) {
+    os<<"\", \""<<dq[i];
+  }
+  os<<"\"]";
+  return os;
+}
+
+
+template<class T1, class T2>
+ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
+  os << pr.first << ":" << pr.second ;
+  return os;
+}
+
+
+template<class T>
+string& operator << (string& str, const T& obj) {
+  stringstream ss;
+  ss << obj; // call ostream& operator << (ostream& os,
+  return str = ss.str();
+}
+
+template<class T1, class T2>
+ostream& operator << (ostream& os, const map<T1, T2>& mp) {
+  if(mp.empty()) {
+    os<<"{}";
+    return os;
+  }
+  os<<'{';
+  typename map<T1, T2>::const_iterator it = mp.begin();
+  os<<*it;
+  it++;
+  while(it != mp.end()) {
+    os<<", "<<*it;
+    it++;
+  }
+  os<<'}';
+  return os;
+}
+template<class T1, class T2>
+ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
+  if(mp.empty()) {
+    return os << "{}";
+  }
+  os<<'{';
+  typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
+  os<<*it;
+  it++;
+  while(it != mp.end()) {
+    os<<", "<<*it++;
+  }
+  return os<<'}';
+}
+
+template<class T>
+ostream& operator << (ostream& os, const set<T>& st) {
+  if(st.empty()) {
+    os << "{}";
+    return os;
+  }
+  os<<'{';
+  typename set<T>::const_iterator it = st.begin();
+  os<<*it;
+  it++;
+  while(it != st.end()) {
+    os<<", "<<*it;
+    it++;
+  }
+  os<<'}';
+  return os;
+}
+
+template<class KeyType, class ContainType>
+bool IsIn(const ContainType& contain, const KeyType& key) {
+  return contain.end() != contain.find(key);
+}
+
+template<class T>
+basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
+  return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
+}
+
+template<class T>
+ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
+  ostreambuf_iterator<T> itr (ofs);
+  copy(s.begin(), s.end(), itr);
+  return ofs;
+}
+
+} // namespace std
+
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/StringUtil.hpp
@ -0,0 +1,365 @@
+/************************************
+ * file enc : ascii
+ * author   : wuyanyi09@gmail.com
+ ************************************/
+#ifndef LIMONP_STR_FUNCTS_H
+#define LIMONP_STR_FUNCTS_H
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <cctype>
+#include <map>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <memory.h>
+#include <functional>
+#include <locale>
+#include <sstream>
+#include <sys/types.h>
+#include <iterator>
+#include <algorithm>
+#include "StdExtension.hpp"
+
+namespace limonp {
+using namespace std;
+inline string StringFormat(const char* fmt, ...) {
+  int size = 256;
+  std::string str;
+  va_list ap;
+  while (1) {
+    str.resize(size);
+    va_start(ap, fmt);
+    int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
+    va_end(ap);
+    if (n > -1 && n < size) {
+      str.resize(n);
+      return str;
+    }
+    if (n > -1)
+      size = n + 1;
+    else
+      size *= 2;
+  }
+  return str;
+}
+
+template<class T>
+void Join(T begin, T end, string& res, const string& connector) {
+  if(begin == end) {
+    return;
+  }
+  stringstream ss;
+  ss<<*begin;
+  begin++;
+  while(begin != end) {
+    ss << connector << *begin;
+    begin ++;
+  }
+  res = ss.str();
+}
+
+template<class T>
+string Join(T begin, T end, const string& connector) {
+  string res;
+  Join(begin ,end, res, connector);
+  return res;
+}
+
+inline string& Upper(string& str) {
+  transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
+  return str;
+}
+
+inline string& Lower(string& str) {
+  transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
+  return str;
+}
+
+inline bool IsSpace(unsigned c) {
+  // when passing large int as the argument of isspace, it core dump, so here need a type cast.
+  return c > 0xff ? false : std::isspace(c & 0xff) != 0;
+}
+
+inline std::string& LTrim(std::string &s) {
+  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
+  return s;
+}
+
+inline std::string& RTrim(std::string &s) {
+  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
+  return s;
+}
+
+inline std::string& Trim(std::string &s) {
+  return LTrim(RTrim(s));
+}
+
+inline std::string& LTrim(std::string & s, char x) {
+  s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
+  return s;
+}
+
+inline std::string& RTrim(std::string & s, char x) {
+  s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
+  return s;
+}
+
+inline std::string& Trim(std::string &s, char x) {
+  return LTrim(RTrim(s, x), x);
+}
+
+inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
+  res.clear();
+  size_t Start = 0;
+  size_t end = 0;
+  string sub;
+  while(Start < src.size()) {
+    end = src.find_first_of(pattern, Start);
+    if(string::npos == end || res.size() >= maxsplit) {
+      sub = src.substr(Start);
+      res.push_back(sub);
+      return;
+    }
+    sub = src.substr(Start, end - Start);
+    res.push_back(sub);
+    Start = end + 1;
+  }
+  return;
+}
+
+inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
+  vector<string> res;
+  Split(src, res, pattern, maxsplit);
+  return res;
+}
+
+inline bool StartsWith(const string& str, const string& prefix) {
+  if(prefix.length() > str.length()) {
+    return false;
+  }
+  return 0 == str.compare(0, prefix.length(), prefix);
+}
+
+inline bool EndsWith(const string& str, const string& suffix) {
+  if(suffix.length() > str.length()) {
+    return false;
+  }
+  return 0 == str.compare(str.length() -  suffix.length(), suffix.length(), suffix);
+}
+
+inline bool IsInStr(const string& str, char ch) {
+  return str.find(ch) != string::npos;
+}
+
+inline uint16_t TwocharToUint16(char high, char low) {
+  return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
+}
+
+template <class Uint16Container>
+bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
+  if(!str) {
+    return false;
+  }
+  char ch1, ch2;
+  uint16_t tmp;
+  vec.clear();
+  for(size_t i = 0; i < len;) {
+    if(!(str[i] & 0x80)) { // 0xxxxxxx
+      vec.push_back(str[i]);
+      i++;
+    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
+      ch1 = (str[i] >> 2) & 0x07;
+      ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
+      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
+      vec.push_back(tmp);
+      i += 2;
+    } else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
+      ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
+      ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
+      tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
+      vec.push_back(tmp);
+      i += 3;
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <class Uint16Container>
+bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
+  return Utf8ToUnicode(str.c_str(), str.size(), vec);
+}
+
+template <class Uint32Container>
+bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
+  uint32_t tmp;
+  vec.clear();
+  for(size_t i = 0; i < str.size();) {
+    if(!(str[i] & 0x80)) { // 0xxxxxxx
+      // 7bit, total 7bit
+      tmp = (uint8_t)(str[i]) & 0x7f;
+      i++;
+    } else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
+      // 5bit, total 5bit
+      tmp = (uint8_t)(str[i]) & 0x1f;
+
+      // 6bit, total 11bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+1]) & 0x3f;
+      i += 2;
+    } else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
+      // 4bit, total 4bit
+      tmp = (uint8_t)(str[i]) & 0x0f;
+
+      // 6bit, total 10bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+1]) & 0x3f;
+
+      // 6bit, total 16bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+2]) & 0x3f;
+
+      i += 3;
+    } else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
+      // 3bit, total 3bit
+      tmp = (uint8_t)(str[i]) & 0x07;
+
+      // 6bit, total 9bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+1]) & 0x3f;
+
+      // 6bit, total 15bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+2]) & 0x3f;
+
+      // 6bit, total 21bit
+      tmp <<= 6;
+      tmp |= (uint8_t)(str[i+3]) & 0x3f;
+
+      i += 4;
+    } else {
+      return false;
+    }
+    vec.push_back(tmp);
+  }
+  return true;
+}
+
+template <class Uint32ContainerConIter>
+void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
+  res.clear();
+  uint32_t ui;
+  while(begin != end) {
+    ui = *begin;
+    if(ui <= 0x7f) {
+      res += char(ui);
+    } else if(ui <= 0x7ff) {
+      res += char(((ui >> 6) & 0x1f) | 0xc0);
+      res += char((ui & 0x3f) | 0x80);
+    } else if(ui <= 0xffff) {
+      res += char(((ui >> 12) & 0x0f) | 0xe0);
+      res += char(((ui >> 6) & 0x3f) | 0x80);
+      res += char((ui & 0x3f) | 0x80);
+    } else {
+      res += char(((ui >> 18) & 0x03) | 0xf0);
+      res += char(((ui >> 12) & 0x3f) | 0x80);
+      res += char(((ui >> 6) & 0x3f) | 0x80);
+      res += char((ui & 0x3f) | 0x80);
+    }
+    begin ++;
+  }
+}
+
+template <class Uint16ContainerConIter>
+void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
+  res.clear();
+  uint16_t ui;
+  while(begin != end) {
+    ui = *begin;
+    if(ui <= 0x7f) {
+      res += char(ui);
+    } else if(ui <= 0x7ff) {
+      res += char(((ui>>6) & 0x1f) | 0xc0);
+      res += char((ui & 0x3f) | 0x80);
+    } else {
+      res += char(((ui >> 12) & 0x0f )| 0xe0);
+      res += char(((ui>>6) & 0x3f )| 0x80 );
+      res += char((ui & 0x3f) | 0x80);
+    }
+    begin ++;
+  }
+}
+
+
+template <class Uint16Container>
+bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
+  vec.clear();
+  if(!str) {
+    return true;
+  }
+  size_t i = 0;
+  while(i < len) {
+    if(0 == (str[i] & 0x80)) {
+      vec.push_back(uint16_t(str[i]));
+      i++;
+    } else {
+      if(i + 1 < len) { //&& (str[i+1] & 0x80))
+        uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
+        vec.push_back(tmp);
+        i += 2;
+      } else {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+template <class Uint16Container>
+bool GBKTrans(const string& str, Uint16Container& vec) {
+  return GBKTrans(str.c_str(), str.size(), vec);
+}
+
+template <class Uint16ContainerConIter>
+void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
+  res.clear();
+  //pair<char, char> pa;
+  char first, second;
+  while(begin != end) {
+    //pa = uint16ToChar2(*begin);
+    first = ((*begin)>>8) & 0x00ff;
+    second = (*begin) & 0x00ff;
+    if(first & 0x80) {
+      res += first;
+      res += second;
+    } else {
+      res += second;
+    }
+    begin++;
+  }
+}
+
+/*
+ * format example: "%Y-%m-%d %H:%M:%S"
+ */
+inline void GetTime(const string& format, string&  timeStr) {
+  time_t timeNow;
+  time(&timeNow);
+  timeStr.resize(64);
+  size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
+  timeStr.resize(len);
+}
+
+inline string PathJoin(const string& path1, const string& path2) {
+  if(EndsWith(path1, "/")) {
+    return path1 + path2;
+  }
+  return path1 + "/" + path2;
+}
+
+}
+#endif
--- a/libchinese-segmentation/cppjieba/limonp/Thread.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/Thread.hpp
@ -0,0 +1,44 @@
+#ifndef LIMONP_THREAD_HPP
+#define LIMONP_THREAD_HPP
+
+#include "Logging.hpp"
+#include "NonCopyable.hpp"
+
+namespace limonp {
+
+class IThread: NonCopyable {
+ public:
+  IThread(): isStarted(false), isJoined(false) {
+  }
+  virtual ~IThread() {
+    if(isStarted && !isJoined) {
+      XCHECK(!pthread_detach(thread_));
+    }
+  };
+
+  virtual void Run() = 0;
+  void Start() {
+    XCHECK(!isStarted);
+    XCHECK(!pthread_create(&thread_, NULL, Worker, this));
+    isStarted = true;
+  }
+  void Join() {
+    XCHECK(!isJoined);
+    XCHECK(!pthread_join(thread_, NULL));
+    isJoined = true;
+  }
+ private:
+  static void * Worker(void * data) {
+    IThread * ptr = (IThread* ) data;
+    ptr->Run();
+    return NULL;
+  }
+
+  pthread_t thread_;
+  bool isStarted;
+  bool isJoined;
+}; // class IThread
+
+} // namespace limonp
+
+#endif // LIMONP_THREAD_HPP
--- a/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp
+++ b/libchinese-segmentation/cppjieba/limonp/ThreadPool.hpp
@ -0,0 +1,86 @@
+#ifndef LIMONP_THREAD_POOL_HPP
+#define LIMONP_THREAD_POOL_HPP
+
+#include "Thread.hpp"
+#include "BlockingQueue.hpp"
+#include "BoundedBlockingQueue.hpp"
+#include "Closure.hpp"
+
+namespace limonp {
+
+using namespace std;
+
+//class ThreadPool;
+class ThreadPool: NonCopyable {
+ public:
+  class Worker: public IThread {
+   public:
+    Worker(ThreadPool* pool): ptThreadPool_(pool) {
+      assert(ptThreadPool_);
+    }
+    virtual ~Worker() {
+    }
+
+    virtual void Run() {
+      while (true) {
+        ClosureInterface* closure = ptThreadPool_->queue_.Pop();
+        if (closure == NULL) {
+          break;
+        }
+        try {
+          closure->Run();
+        } catch(std::exception& e) {
+          XLOG(ERROR) << e.what();
+        } catch(...) {
+          XLOG(ERROR) << " unknown exception.";
+        }
+        delete closure;
+      }
+    }
+   private:
+    ThreadPool * ptThreadPool_;
+  }; // class Worker
+
+  ThreadPool(size_t thread_num)
+    : threads_(thread_num), 
+      queue_(thread_num) {
+    assert(thread_num);
+    for(size_t i = 0; i < threads_.size(); i ++) {
+      threads_[i] = new Worker(this);
+    }
+  }
+  ~ThreadPool() {
+    Stop();
+  }
+
+  void Start() {
+    for(size_t i = 0; i < threads_.size(); i++) {
+      threads_[i]->Start();
+    }
+  }
+  void Stop() {
+    for(size_t i = 0; i < threads_.size(); i ++) {
+      queue_.Push(NULL);
+    }
+    for(size_t i = 0; i < threads_.size(); i ++) {
+      threads_[i]->Join();
+      delete threads_[i];
+    }
+    threads_.clear();
+  }
+
+  void Add(ClosureInterface* task) {
+    assert(task);
+    queue_.Push(task);
+  }
+
+ private:
+  friend class Worker;
+
+  vector<IThread*> threads_;
+  BoundedBlockingQueue<ClosureInterface*> queue_;
+}; // class ThreadPool
+
+} // namespace limonp
+
+#endif // LIMONP_THREAD_POOL_HPP
--- a/libchinese-segmentation/cppjieba/limonp/limonp.pri
+++ b/libchinese-segmentation/cppjieba/limonp/limonp.pri
@ -0,0 +1,22 @@
+INCLUDEPATH += $$PWD
+
+HEADERS += \
+    $$PWD/ArgvContext.hpp \
+    $$PWD/BlockingQueue.hpp \
+    $$PWD/BoundedBlockingQueue.hpp \
+    $$PWD/BoundedQueue.hpp \
+    $$PWD/Closure.hpp \
+    $$PWD/Colors.hpp \
+    $$PWD/Condition.hpp \
+    $$PWD/Config.hpp \
+    $$PWD/FileLock.hpp \
+    $$PWD/ForcePublic.hpp \
+    $$PWD/LocalVector.hpp \
+    $$PWD/Logging.hpp \
+    $$PWD/Md5.hpp \
+    $$PWD/MutexLock.hpp \
+    $$PWD/NonCopyable.hpp \
+    $$PWD/StdExtension.hpp \
+    $$PWD/StringUtil.hpp \
+    $$PWD/Thread.hpp \
+    $$PWD/ThreadPool.hpp
--- a/libchinese-segmentation/dict/README.md
+++ b/libchinese-segmentation/dict/README.md
@ -0,0 +1,31 @@
+# CppJieba字典
+
+文件后缀名代表的是词典的编码方式。
+比如filename.utf8 是 utf8编码，filename.gbk 是 gbk编码方式。
+
+
+## 分词
+
+### jieba.dict.utf8/gbk
+
+作为最大概率法(MPSegment: Max Probability)分词所使用的词典。
+
+### hmm_model.utf8/gbk
+
+作为隐式马尔科夫模型(HMMSegment: Hidden Markov Model)分词所使用的词典。
+
+__对于MixSegment(混合MPSegment和HMMSegment两者)则同时使用以上两个词典__
+
+
+## 关键词抽取
+
+### idf.utf8
+
+IDF(Inverse Document Frequency)
+在KeywordExtractor中，使用的是经典的TF-IDF算法，所以需要这么一个词典提供IDF信息。
+
+### stop_words.utf8
+
+停用词词典
+
+
--- a/libchinese-segmentation/dict/hmm_model.utf8
+++ b/libchinese-segmentation/dict/hmm_model.utf8
--- a/libchinese-segmentation/dict/idf.utf8
+++ b/libchinese-segmentation/dict/idf.utf8
--- a/libchinese-segmentation/dict/jieba.dict.utf8
+++ b/libchinese-segmentation/dict/jieba.dict.utf8
--- a/libchinese-segmentation/dict/pos_dict/char_state_tab.utf8
+++ b/libchinese-segmentation/dict/pos_dict/char_state_tab.utf8
--- a/libchinese-segmentation/dict/pos_dict/prob_emit.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_emit.utf8
--- a/libchinese-segmentation/dict/pos_dict/prob_start.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_start.utf8
@ -0,0 +1,259 @@
+#初始状态的概率
+#格式
+#状态:概率
+B,a:-4.7623052146
+B,ad:-6.68006603678
+B,ag:-3.14e+100
+B,an:-8.69708322302
+B,b:-5.01837436211
+B,bg:-3.14e+100
+B,c:-3.42388018495
+B,d:-3.97504752976
+B,df:-8.88897423083
+B,dg:-3.14e+100
+B,e:-8.56355183039
+B,en:-3.14e+100
+B,f:-5.49163041848
+B,g:-3.14e+100
+B,h:-13.53336513
+B,i:-6.11578472756
+B,in:-3.14e+100
+B,j:-5.05761912847
+B,jn:-3.14e+100
+B,k:-3.14e+100
+B,l:-4.90588358466
+B,ln:-3.14e+100
+B,m:-3.6524299819
+B,mg:-3.14e+100
+B,mq:-6.7869530014
+B,n:-1.69662577975
+B,ng:-3.14e+100
+B,nr:-2.23104959138
+B,nrfg:-5.87372217541
+B,nrt:-4.98564273352
+B,ns:-2.8228438315
+B,nt:-4.84609166818
+B,nz:-3.94698846058
+B,o:-8.43349870215
+B,p:-4.20098413209
+B,q:-6.99812385896
+B,qe:-3.14e+100
+B,qg:-3.14e+100
+B,r:-3.40981877908
+B,rg:-3.14e+100
+B,rr:-12.4347528413
+B,rz:-7.94611647157
+B,s:-5.52267359084
+B,t:-3.36474790945
+B,tg:-3.14e+100
+B,u:-9.1639172775
+B,ud:-3.14e+100
+B,ug:-3.14e+100
+B,uj:-3.14e+100
+B,ul:-3.14e+100
+B,uv:-3.14e+100
+B,uz:-3.14e+100
+B,v:-2.67405848743
+B,vd:-9.04472876024
+B,vg:-3.14e+100
+B,vi:-12.4347528413
+B,vn:-4.33156108902
+B,vq:-12.1470707689
+B,w:-3.14e+100
+B,x:-3.14e+100
+B,y:-9.84448567586
+B,yg:-3.14e+100
+B,z:-7.04568111149
+B,zg:-3.14e+100
+E,a:-3.14e+100
+E,ad:-3.14e+100
+E,ag:-3.14e+100
+E,an:-3.14e+100
+E,b:-3.14e+100
+E,bg:-3.14e+100
+E,c:-3.14e+100
+E,d:-3.14e+100
+E,df:-3.14e+100
+E,dg:-3.14e+100
+E,e:-3.14e+100
+E,en:-3.14e+100
+E,f:-3.14e+100
+E,g:-3.14e+100
+E,h:-3.14e+100
+E,i:-3.14e+100
+E,in:-3.14e+100
+E,j:-3.14e+100
+E,jn:-3.14e+100
+E,k:-3.14e+100
+E,l:-3.14e+100
+E,ln:-3.14e+100
+E,m:-3.14e+100
+E,mg:-3.14e+100
+E,mq:-3.14e+100
+E,n:-3.14e+100
+E,ng:-3.14e+100
+E,nr:-3.14e+100
+E,nrfg:-3.14e+100
+E,nrt:-3.14e+100
+E,ns:-3.14e+100
+E,nt:-3.14e+100
+E,nz:-3.14e+100
+E,o:-3.14e+100
+E,p:-3.14e+100
+E,q:-3.14e+100
+E,qe:-3.14e+100
+E,qg:-3.14e+100
+E,r:-3.14e+100
+E,rg:-3.14e+100
+E,rr:-3.14e+100
+E,rz:-3.14e+100
+E,s:-3.14e+100
+E,t:-3.14e+100
+E,tg:-3.14e+100
+E,u:-3.14e+100
+E,ud:-3.14e+100
+E,ug:-3.14e+100
+E,uj:-3.14e+100
+E,ul:-3.14e+100
+E,uv:-3.14e+100
+E,uz:-3.14e+100
+E,v:-3.14e+100
+E,vd:-3.14e+100
+E,vg:-3.14e+100
+E,vi:-3.14e+100
+E,vn:-3.14e+100
+E,vq:-3.14e+100
+E,w:-3.14e+100
+E,x:-3.14e+100
+E,y:-3.14e+100
+E,yg:-3.14e+100
+E,z:-3.14e+100
+E,zg:-3.14e+100
+M,a:-3.14e+100
+M,ad:-3.14e+100
+M,ag:-3.14e+100
+M,an:-3.14e+100
+M,b:-3.14e+100
+M,bg:-3.14e+100
+M,c:-3.14e+100
+M,d:-3.14e+100
+M,df:-3.14e+100
+M,dg:-3.14e+100
+M,e:-3.14e+100
+M,en:-3.14e+100
+M,f:-3.14e+100
+M,g:-3.14e+100
+M,h:-3.14e+100
+M,i:-3.14e+100
+M,in:-3.14e+100
+M,j:-3.14e+100
+M,jn:-3.14e+100
+M,k:-3.14e+100
+M,l:-3.14e+100
+M,ln:-3.14e+100
+M,m:-3.14e+100
+M,mg:-3.14e+100
+M,mq:-3.14e+100
+M,n:-3.14e+100
+M,ng:-3.14e+100
+M,nr:-3.14e+100
+M,nrfg:-3.14e+100
+M,nrt:-3.14e+100
+M,ns:-3.14e+100
+M,nt:-3.14e+100
+M,nz:-3.14e+100
+M,o:-3.14e+100
+M,p:-3.14e+100
+M,q:-3.14e+100
+M,qe:-3.14e+100
+M,qg:-3.14e+100
+M,r:-3.14e+100
+M,rg:-3.14e+100
+M,rr:-3.14e+100
+M,rz:-3.14e+100
+M,s:-3.14e+100
+M,t:-3.14e+100
+M,tg:-3.14e+100
+M,u:-3.14e+100
+M,ud:-3.14e+100
+M,ug:-3.14e+100
+M,uj:-3.14e+100
+M,ul:-3.14e+100
+M,uv:-3.14e+100
+M,uz:-3.14e+100
+M,v:-3.14e+100
+M,vd:-3.14e+100
+M,vg:-3.14e+100
+M,vi:-3.14e+100
+M,vn:-3.14e+100
+M,vq:-3.14e+100
+M,w:-3.14e+100
+M,x:-3.14e+100
+M,y:-3.14e+100
+M,yg:-3.14e+100
+M,z:-3.14e+100
+M,zg:-3.14e+100
+S,a:-3.90253968313
+S,ad:-11.0484584802
+S,ag:-6.95411391796
+S,an:-12.8402179494
+S,b:-6.47288876397
+S,bg:-3.14e+100
+S,c:-4.78696679586
+S,d:-3.90391976418
+S,df:-3.14e+100
+S,dg:-8.9483976513
+S,e:-5.94251300628
+S,en:-3.14e+100
+S,f:-5.19482024998
+S,g:-6.50782681533
+S,h:-8.65056320738
+S,i:-3.14e+100
+S,in:-3.14e+100
+S,j:-4.91199211964
+S,jn:-3.14e+100
+S,k:-6.94032059583
+S,l:-3.14e+100
+S,ln:-3.14e+100
+S,m:-3.26920065212
+S,mg:-10.8253149289
+S,mq:-3.14e+100
+S,n:-3.85514838976
+S,ng:-4.9134348611
+S,nr:-4.48366310396
+S,nrfg:-3.14e+100
+S,nrt:-3.14e+100
+S,ns:-3.14e+100
+S,nt:-12.1470707689
+S,nz:-3.14e+100
+S,o:-8.46446092775
+S,p:-2.98684018136
+S,q:-4.88865861826
+S,qe:-3.14e+100
+S,qg:-3.14e+100
+S,r:-2.76353367841
+S,rg:-10.2752685919
+S,rr:-3.14e+100
+S,rz:-3.14e+100
+S,s:-3.14e+100
+S,t:-3.14e+100
+S,tg:-6.27284253188
+S,u:-6.94032059583
+S,ud:-7.72823016105
+S,ug:-7.53940370266
+S,uj:-6.85251045118
+S,ul:-8.41537131755
+S,uv:-8.15808672229
+S,uz:-9.29925862537
+S,v:-3.05329230341
+S,vd:-3.14e+100
+S,vg:-5.94301818437
+S,vi:-3.14e+100
+S,vn:-11.4539235883
+S,vq:-3.14e+100
+S,w:-3.14e+100
+S,x:-8.42741965607
+S,y:-6.19707946995
+S,yg:-13.53336513
+S,z:-3.14e+100
+S,zg:-3.14e+100
--- a/libchinese-segmentation/dict/pos_dict/prob_trans.utf8
+++ b/libchinese-segmentation/dict/pos_dict/prob_trans.utf8
--- a/libchinese-segmentation/dict/stop_words.utf8
+++ b/libchinese-segmentation/dict/stop_words.utf8
--- a/libchinese-segmentation/dict/user.dict.utf8
+++ b/libchinese-segmentation/dict/user.dict.utf8
@ -0,0 +1,4 @@
+云计算
+韩玉鉴赏
+蓝翔 nz
+区块链 10 nz
--- a/libchinese-segmentation/libchinese-segmentation.pro
+++ b/libchinese-segmentation/libchinese-segmentation.pro
@ -1,5 +1,6 @@
 QT -= gui

+#VERSION = 0.0.1
 TARGET =  chinese-segmentation
 TEMPLATE = lib
 DEFINES += LIBCHINESESEGMENTATION_LIBRARY
@ -16,7 +17,7 @@ DEFINES += QT_DEPRECATED_WARNINGS
 # In order to do so, uncomment the following line.
 # You can also select to disable deprecated APIs only up to a certain version of Qt.
 #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
-#include(jieba/jieba.pri)
+include(cppjieba/cppjieba.pri)

 SOURCES += \
    chinese-segmentation.cpp \
@ -25,6 +26,13 @@ HEADERS += \
    chinese-segmentation.h \
    libchinese-segmentation_global.h

+dict_files.path = /usr/share/ukui-search/res/dict/
+dict_files.files = $$PWD/dict/*.utf8\
+dict_files.files += $$PWD/dict/pos_dict/*.utf8\
+
+INSTALLS += \
+    dict_files \
+
 # Default rules for deployment.
 unix {
    target.path = /usr/lib
@ -33,3 +41,16 @@ unix {

 #DISTFILES += \
 #    jiaba/jieba.pri
+
+DISTFILES += \
+    dict/README.md \
+    dict/hmm_model.utf8 \
+    dict/idf.utf8 \
+    dict/jieba.dict.utf8 \
+    dict/pos_dict/char_state_tab.utf8 \
+    dict/pos_dict/prob_emit.utf8 \
+    dict/pos_dict/prob_start.utf8 \
+    dict/pos_dict/prob_trans.utf8 \
+    dict/stop_words.utf8 \
+    dict/user.dict.utf8
+
--- a/libsearch/file-utils.cpp
+++ b/libsearch/file-utils.cpp
@ -398,11 +398,11 @@ QString *FileUtils::getDocxTextContent(QString &path)
    QFileInfo info = QFileInfo(path);
    if(!info.exists()||info.isDir())
        return nullptr;
-    QuaZip file("path");
-    if(file.open(QuaZip::mdUnzip))
+    QuaZip file(path);
+    if(!file.open(QuaZip::mdUnzip))
        return nullptr;

-    if(file.setCurrentFile("word/document.xml",QuaZip::csSensitive))
+    if(!file.setCurrentFile("word/document.xml",QuaZip::csSensitive))
        return nullptr;
    QuaZipFile fileR(&file);

@ -424,7 +424,6 @@ QString *FileUtils::getDocxTextContent(QString &path)
        }
        first = first.nextSiblingElement();
    }
-    qDebug()<<"size!!!"<<allText->size();
    return allText;
 }

--- a/libsearch/file-utils.h
+++ b/libsearch/file-utils.h
@ -27,8 +27,8 @@ public:

    //parse text,docx.....
    static QString getMimetype(QString &path, bool getsuffix = false);
-    static QString * getDocxTextContent(QString &path);
-    static QString * getTxtContent(QString &path);
+    static QString *getDocxTextContent(QString &path);
+    static QString *getTxtContent(QString &path);

 private:
    FileUtils();
--- a/libsearch/index/document.cpp
+++ b/libsearch/index/document.cpp
@ -23,11 +23,11 @@ void Document::setData(QString data)
    m_document->set_data(data.toStdString());
 }

-void Document::addterm(QString term)
+void Document::addterm(std::string term, int weight)
 {
-    if(term.isEmpty())
+    if(term == "")
        return;
-    m_document->add_term(term.toStdString());
+    m_document->add_term(term,weight);
 }

 void Document::addValue(QString value)
--- a/libsearch/index/document.h
+++ b/libsearch/index/document.h
@ -11,7 +11,7 @@ public:
    Document();
    ~Document();
    void setData(QString data);
-    void addterm(QString term);
+    void addterm(std::string term,int weight =1);
    void addValue(QString value);
    void setUniqueTerm(QString term);
    std::string getUniqueTerm();
--- a/libsearch/index/file-reader.cpp
+++ b/libsearch/index/file-reader.cpp
@ -10,7 +10,7 @@ QString *FileReader::getTextContent(QString path)
 {
    //获取所有文件内容
    //先分类
-    QString type =FileUtils::getMimetype(path);
+    QString type =FileUtils::getMimetype(path,true);
    if(type == "application/zip")
        return FileUtils::getDocxTextContent(path);
    else if(type == "text/plain")
--- a/libsearch/index/file-searcher.cpp
+++ b/libsearch/index/file-searcher.cpp
@ -73,6 +73,43 @@ void FileSearcher::onKeywordSearch(QString keyword, int begin, int num)

 }

+void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
+{
+    QStringList searchResult;
+    try
+    {
+        qDebug()<<"--content search start--";
+
+        Xapian::Database db(CONTENT_INDEX_PATH);
+        Xapian::Enquire enquire(db);
+        Xapian::QueryParser qp;
+        qp.set_default_op(Xapian::Query::OP_PHRASE);
+        qp.set_database(db);
+
+        //Creat a query
+        Xapian::Query queryPhrase = qp.parse_query(keyword.toStdString(),Xapian::QueryParser::FLAG_PHRASE);
+
+        qDebug()<<QString::fromStdString(queryPhrase.get_description());
+
+        enquire.set_query(queryPhrase);
+        //dir result
+        Xapian::MSet result = enquire.get_mset(begin, begin+num);
+        qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated());
+        searchResult = getResult(result);
+
+        qDebug()<< "--content search finish--";
+    }
+    catch(const Xapian::Error &e)
+    {
+        qDebug() <<QString::fromStdString(e.get_description());
+        qDebug()<< "--content search finish--";
+        return;
+    }
+    Q_EMIT this->contentResult(searchResult);
+    qDebug()<<searchResult;
+    return;
+}
+
 QStringList FileSearcher::getResult(Xapian::MSet &result)
 {
    //QStringList *pathTobeDelete = new QStringList;
@ -84,7 +121,6 @@ QStringList FileSearcher::getResult(Xapian::MSet &result)
    for (auto it = result.begin(); it != result.end(); ++it)
    {
        Xapian::Document doc = it.get_document();
-        qDebug()<<"value!!!!"<<QString::fromStdString(doc.get_value(1));
        std::string data = doc.get_data();
        Xapian::weight docScoreWeight = it.get_weight();
        Xapian::percent docScorePercent = it.get_percent();
--- a/libsearch/index/file-searcher.h
+++ b/libsearch/index/file-searcher.h
@ -6,6 +6,7 @@
 #include <QStandardPaths>
 #include <QVector>
 #define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/index_data").toStdString()
+#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/content_index_data").toStdString()


 class FileSearcher : public QObject
@ -15,10 +16,12 @@ public:
    explicit FileSearcher(QObject *parent = nullptr);

 public Q_SLOTS:
-    void onKeywordSearch(QString keyword, int begin, int num);
+    void onKeywordSearch(QString keyword, int begin = 0, int num = 20);
+    void onKeywordSearchContent(QString keyword, int begin = 0, int num = 20);

 Q_SIGNALS:
    void result(QVector<QStringList> resultV);
+    void contentResult(QStringList resultL);
 private:
    QStringList getResult(Xapian::MSet &result);
 };
--- a/libsearch/index/filetypefilter.cpp
+++ b/libsearch/index/filetypefilter.cpp
@ -1,9 +1,10 @@
 #include <QDebug>
 #include "filetypefilter.h"
+#include "index-generator.h"

 FileTypeFilter::FileTypeFilter(const QString& path) : Traverse_BFS(path)
 {
-    this->result = new QVector<QString>();
+    this->result = new QList<QString>();
    this->Traverse();
 }

@ -27,7 +28,16 @@ void FileTypeFilter::DoSomething(const QFileInfo& fileInfo){

 }

-QVector<QString>* FileTypeFilter::getTargetFileAbsolutePath(){
+QList<QString>* FileTypeFilter::getTargetFileAbsolutePath(){
    return this->result;
 }

+
+void FileTypeFilter::Test(){
+    IndexGenerator* ig = IndexGenerator::getInstance();
+//    this->result = new QList<QString>();
+//    this->result->append(QString("/home/zpf/桌面/DOCX 文档.docx"));
+
+        ig->creatAllIndex(this->result);
+}
+
--- a/libsearch/index/filetypefilter.h
+++ b/libsearch/index/filetypefilter.h
@ -4,7 +4,7 @@
 #include <QObject>
 #include <QMimeDatabase>
 #include <QMimeType>
-#include <QVector>
+#include <QList>
 #include "traverse_bfs.h"

 class FileTypeFilter : public QObject, public Traverse_BFS
@ -14,18 +14,18 @@ public:
    explicit FileTypeFilter(const QString&);
    ~FileTypeFilter();
    virtual void DoSomething(const QFileInfo&) final;
-    QVector<QString>* getTargetFileAbsolutePath();
-
+    QList<QString>* getTargetFileAbsolutePath();
+    void Test();
 Q_SIGNALS:
 private:
-    const QVector<QString> targetFileTypeVec ={ QString(".doc"),
-                                                QString(".docx"),
-                                                QString(".ppt"),
+    const QVector<QString> targetFileTypeVec ={ /*QString(".doc"),*/
+                                                QString(".docx")/*,*/
+                                                /*QString(".ppt"),
                                                QString(".pptx"),
                                                QString(".xls"),
                                                QString(".xlsx"),
-                                                QString(".txt")};
-    QVector<QString>* result;
+                                                QString(".txt")*/};
+    QList<QString>* result;

 };

--- a/libsearch/index/index-generator.cpp
+++ b/libsearch/index/index-generator.cpp
@ -2,9 +2,11 @@
 #include <QStandardPaths>
 #include <QFileInfo>
 #include <QDebug>
+#include "chinese-segmentation.h"
 #include "file-utils.h"
 #include "index-generator.h"
 #include "chinesecharacterstopinyin.h"
+
 #include <QtConcurrent>
 #include <QFuture>

@ -28,6 +30,7 @@ bool IndexGenerator::setIndexdataPath()
    return true;
 }

+//文件名索引
 bool IndexGenerator::creatAllIndex(QList<QVector<QString> > *messageList)
 {
    HandlePathList(messageList);
@ -62,10 +65,32 @@ bool IndexGenerator::creatAllIndex(QList<QVector<QString> > *messageList)

    return true;
 }
-
-bool IndexGenerator::creatAllIndex(QVector<QString> *messageList)
+//文件内容索引
+bool IndexGenerator::creatAllIndex(QList<QString> *messageList)
 {
    HandlePathList(messageList);
+    try
+    {
+        int count =0;
+        for(int i = 0;i < m_doc_list_content->size(); i++)
+        {
+            insertIntoContentDatabase(m_doc_list_content->at(i));
+
+            if(++count == 9999)
+            {
+                count = 0;
+                m_database_content->commit();
+            }
+        }
+        m_database_content->commit();
+    }
+    catch(const Xapian::Error &e)
+    {
+        qDebug()<<"creat content Index fail!"<<QString::fromStdString(e.get_description());
+        return false;
+    }
+    m_doc_list_content->clear();
+    Q_EMIT this->transactionFinished();
    return true;

 }
@ -97,6 +122,14 @@ void IndexGenerator::insertIntoDatabase(Document doc)
    return;
 }

+void IndexGenerator::insertIntoContentDatabase(Document doc)
+{
+    Xapian::docid innerId= m_database_content->replace_document(doc.getUniqueTerm(),doc.getXapianDocument());
+//    qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
+//    qDebug()<< "--index finish--";
+    return;
+}
+
 void IndexGenerator::HandlePathList(QList<QVector<QString>> *messageList)
 {
    qDebug()<<"Begin HandlePathList!";
@ -114,7 +147,7 @@ void IndexGenerator::HandlePathList(QList<QVector<QString>> *messageList)
    return;
 }

-void IndexGenerator::HandlePathList(QVector<QString> *messageList)
+void IndexGenerator::HandlePathList(QList<QString> *messageList)
 {
    qDebug()<<"Begin HandlePathList for content index!";
    qDebug()<<messageList->size();
@ -148,10 +181,10 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)

    //多音字版
    //现加入首字母
-    QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(list.at(0)).replace(".",""));
-    for (QString& i : pinyin_text_list){
-        i.replace("", " ");
-    }
+//    QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(list.at(0)).replace(".",""));
+//    for (QString& i : pinyin_text_list){
+//        i.replace("", " ");
+//    }

    QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
 //    QString uniqueterm1 = QString::fromStdString(QCryptographicHash::hash(sourcePath.toUtf8(),QCryptographicHash::Md5).toStdString());
@ -169,10 +202,9 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
    doc.setUniqueTerm(uniqueterm);
    doc.addValue(list.at(2));
    if(list.at(2) == QString("1"))
-        qDebug()<<"value!!!"<<list.at(2);
    QStringList temp;
    temp.append(index_text);
-    temp.append(pinyin_text_list);
+//    temp.append(pinyin_text_list);
    doc.setIndexText(temp);
 //    doc.setIndexText(QStringList()<<index_text<<pinyin_text);
 //    doc.setIndexText(QStringList()<<index_text);
@ -182,15 +214,19 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)

 Document IndexGenerator::GenerateContentDocument(const QString &path)
 {
-    //构造文本索引的document
-    FileReader::getTextContent(path);
+//    构造文本索引的document
+    QString *content = FileReader::getTextContent(path);
    QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
+    QVector<SKeyWord> term = ChineseSegmentation::callSegement(content);
    Document doc;
    doc.setData(path);
    doc.setUniqueTerm(uniqueterm);
+    for(int i = 0;i<term.size();++i)
+    {
+        doc.addterm(term.at(i).word,static_cast<int>(term.at(i).weight));
+
+    }
    return doc;
-
-
 }

 bool IndexGenerator::isIndexdataExist()
--- a/libsearch/index/index-generator.h
+++ b/libsearch/index/index-generator.h
@ -23,7 +23,7 @@ Q_SIGNALS:
    void searchFinish();
 public Q_SLOTS:
    bool creatAllIndex(QList<QVector<QString>> *messageList);
-    bool creatAllIndex(QVector<QString> *messageList);
+    bool creatAllIndex(QList<QString> *messageList);
    bool deleteAllIndex(QStringList *pathlist);

 private:
@ -31,11 +31,12 @@ private:
    //For file name index
    void HandlePathList(QList<QVector<QString>> *messageList);
    //For file content index
-    void HandlePathList(QVector<QString> *messageList);
+    void HandlePathList(QList<QString> *messageList);
    static Document GenerateDocument(const QVector<QString> &list);
    static Document GenerateContentDocument(const QString &list);
    //add one data in database
    void insertIntoDatabase(Document doc);
+    void insertIntoContentDatabase(Document doc);
    ~IndexGenerator();

    QMap<QString,QStringList> *m_index_map;
--- a/libsearch/libsearch.cpp
+++ b/libsearch/libsearch.cpp
@ -1,5 +1,10 @@
 #include "libsearch.h"

-Libsearch::Libsearch()
+GlobalSearch::GlobalSearch()
 {
 }
+
+QStringList GlobalSearch::fileSearch(QString keyword, int begin, int num)
+{
+
+}
--- a/libsearch/libsearch.h
+++ b/libsearch/libsearch.h
@ -7,4 +7,17 @@
 #include "settingsearch/setting-match.h"
 #include "index/inotify.h"
 #include "file-utils.h"
+
+#include "index/filetypefilter.h"
+
+class  LIBSEARCH_EXPORT GlobalSearch
+{
+public:
+
+    static QStringList fileSearch(QString keyword, int begin = 0, int num = -1);
+
+private:
+    GlobalSearch();
+};
+
 #endif // LIBSEARCH_H
--- a/libsearch/libsearch.pro
+++ b/libsearch/libsearch.pro
@ -1,4 +1,4 @@
-QT += core gui concurrent xml
+QT += core concurrent xml

 VERSION = 0.0.1

@ -24,14 +24,16 @@ DEFINES += QT_DEPRECATED_WARNINGS

 include(index/index.pri)
 include(appsearch/appsearch.pri)
-include(settingsearch/settingsearch.pri)
+include(settingsearch/settingsearch.pri))

+LIBS += -L../libchinese-segmentation/ -lchinese-segmentation
 LIBS += -lxapian -lgsettings-qt -lquazip5

 SOURCES += \
    file-utils.cpp \
    globalsettings.cpp \
-    gobject-template.cpp
+    gobject-template.cpp \
+    libsearch.cpp

 HEADERS += \
    file-utils.h \
@ -41,7 +43,7 @@ HEADERS += \
    libsearch.h

 RESOURCES += \
-    resource1.qrc
+    resource1.qrc \



@ -51,9 +53,7 @@ unix {
 }
 !isEmpty(target.path): INSTALLS += target

-win32:CONFIG(release, debug|release): LIBS += -L$$OUT_PWD/../libchinese-segmentation/release/ -lchinese-segmentation
-else:win32:CONFIG(debug, debug|release): LIBS += -L$$OUT_PWD/../libchinese-segmentation/debug/ -lchinese-segmentation
-else:unix: LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation
+

 INCLUDEPATH += $$PWD/../libchinese-segmentation
 DEPENDPATH += $$PWD/../libchinese-segmentation
--- a/src/main.cpp
+++ b/src/main.cpp
@ -31,6 +31,7 @@
 //#include "inotify-manager.h"
 #include "libsearch.h"

+
 void centerToScreen(QWidget* widget) {
    if (!widget)
      return;
@ -56,6 +57,22 @@ int main(int argc, char *argv[])
 //    qDebug() << t2;
    /*-------------InotyifyRefact Test End-----------------*/

+    /*-------------InotyifyRefact Test Start---------------*/
+//    QTime t1 = QTime::currentTime();
+//    FileTypeFilter* ftf = new FileTypeFilter("/home");
+//    ftf->Test();
+//    QTime t2 = QTime::currentTime();
+//    delete ftf;
+//    ftf = nullptr;
+//    qDebug() << t1;
+//    qDebug() << t2;
+    /*-------------InotyifyRefact Test End-----------------*/
+
+    /*-------------文本搜索 Test start-----------------*/
+//    FileSearcher *search = new FileSearcher();
+//    search->onKeywordSearchContent("麒麟");
+    /*-------------文本搜索 Test End-----------------*/
+
    qRegisterMetaType<QVector<QStringList>>("QVector<QStringList>");

    QApplication::setAttribute(Qt::AA_EnableHighDpiScaling);
--- a/src/mainwindow.cpp
+++ b/src/mainwindow.cpp
@ -47,13 +47,13 @@ MainWindow::MainWindow(QWidget *parent) :
 {
 //    FileUtils::findMultiToneWords("仇仇仇仇仇仇仇仇仇仇仇翟康宁test");
    /*-------------Inotify Test Start---------------*/
-    QTime t1 = QTime::currentTime();
-    InotifyManagerRefact* im = new InotifyManagerRefact("/home");
-    im->Traverse();
-    QTime t2 = QTime::currentTime();
-    qDebug() << t1;
-    qDebug() << t2;
-    im->start();
+//    QTime t1 = QTime::currentTime();
+//    InotifyManagerRefact* im = new InotifyManagerRefact("/home");
+//    im->Traverse();
+//    QTime t2 = QTime::currentTime();
+//    qDebug() << t1;
+//    qDebug() << t2;
+//    im->start();
    /*-------------Inotify Test End-----------------*/

    this->setWindowFlags(Qt::CustomizeWindowHint | Qt::FramelessWindowHint | Qt::X11BypassWindowManagerHint);
--- a/src/src.pro
+++ b/src/src.pro
@ -57,7 +57,7 @@ qm_files.files = res/translations/*.qm\
 INSTALLS += \
    qm_files \

-unix:!macx: LIBS += -L$$OUT_PWD/../libsearch/ -lukui-search
+unix:!macx: LIBS += -L$$OUT_PWD/../libsearch -lukui-search -L../libchinese-segmentation/ -lchinese-segmentation

 INCLUDEPATH += $$PWD/../libsearch
 DEPENDPATH += $$PWD/../libsearch
--- a/ukui-search.pro
+++ b/ukui-search.pro
@ -1,8 +1,7 @@
 TEMPLATE = subdirs
-SUBDIRS += \
-    libchinese-segmentation \
-    libsearch \
-    src
+SUBDIRS += $$PWD/libchinese-segmentation \
+           $$PWD/libsearch \
+           $$PWD/src
 # The following define makes your compiler emit warnings if you use
 # any Qt feature that has been marked deprecated (the exact warnings
 # depend on your compiler). Please consult the documentation of the
@ -14,6 +13,8 @@ DEFINES += QT_DEPRECATED_WARNINGS
 # You can also select to disable deprecated APIs only up to a certain version of Qt.
 #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0

-src.depends = libsearch
 libsearch.depends = libchinese-segmentation
+src.depends = libsearch
+
+CONFIG += ordered