diff --git a/libchinese-segmentation/chinese-segmentation.cpp b/libchinese-segmentation/chinese-segmentation.cpp
index 15c5207..fe4e95f 100644
--- a/libchinese-segmentation/chinese-segmentation.cpp
+++ b/libchinese-segmentation/chinese-segmentation.cpp
@@ -58,7 +58,7 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
 //    str.squeeze();
 
     const size_t topk = -1;
-    std::vector<cppjieba::KeywordExtractor::Word> keywordres;
+    std::vector<cppjieba::KeyWord> keywordres;
     ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
     std::string().swap(s);
     QVector<SKeyWord> vecNeeds;
@@ -72,16 +72,16 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
 
 }
 
-std::vector<cppjieba::KeywordExtractor::Word> ChineseSegmentation::callSegementStd(const std::string &str) {
+std::vector<cppjieba::KeyWord> ChineseSegmentation::callSegementStd(const std::string &str) {
 
     const size_t topk = -1;
-    std::vector<cppjieba::KeywordExtractor::Word> keywordres;
+    std::vector<cppjieba::KeyWord> keywordres;
     ChineseSegmentation::m_jieba->extractor.Extract(str, keywordres, topk);
 
     return keywordres;
 }
 
-void ChineseSegmentation::convert(std::vector<cppjieba::KeywordExtractor::Word> &keywordres, QVector<SKeyWord> &kw) {
+void ChineseSegmentation::convert(std::vector<cppjieba::KeyWord> &keywordres, QVector<SKeyWord> &kw) {
     for(auto i : keywordres) {
         SKeyWord temp;
         temp.word = i.word;
diff --git a/libchinese-segmentation/chinese-segmentation.h b/libchinese-segmentation/chinese-segmentation.h
index e653f66..01e8046 100644
--- a/libchinese-segmentation/chinese-segmentation.h
+++ b/libchinese-segmentation/chinese-segmentation.h
@@ -50,8 +50,8 @@ public:
     QVector<SKeyWord> callSegement(std::string s);
     //新添加callSegementStd函数，修改返回值为std：：vector<cppjieba::KeywordExtractor::Word>并简化内部处理流程--jxx20210517
     //修改函数入参形式为引用，去掉Qstring与std::string转换代码--jxx20210519
-    std::vector<cppjieba::KeywordExtractor::Word> callSegementStd(const std::string& str);
-    void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres, QVector<SKeyWord>& kw);
+    std::vector<cppjieba::KeyWord> callSegementStd(const std::string& str);
+    void convert(std::vector<cppjieba::KeyWord>& keywordres, QVector<SKeyWord>& kw);
 private:
     static QMutex m_mutex;
     cppjieba::Jieba *m_jieba;
diff --git a/libchinese-segmentation/cppjieba/FullSegment.hpp b/libchinese-segmentation/cppjieba/FullSegment.hpp
index 6eedbd9..1652b75 100644
--- a/libchinese-segmentation/cppjieba/FullSegment.hpp
+++ b/libchinese-segmentation/cppjieba/FullSegment.hpp
@@ -47,7 +47,10 @@ public:
                      size_t) const override {
 
     }
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t) const override {
 
+    }
 private:
     const DictTrie* dictTrie_;
 };
diff --git a/libchinese-segmentation/cppjieba/HMMSegment.hpp b/libchinese-segmentation/cppjieba/HMMSegment.hpp
index 1e5d08c..1a9937b 100644
--- a/libchinese-segmentation/cppjieba/HMMSegment.hpp
+++ b/libchinese-segmentation/cppjieba/HMMSegment.hpp
@@ -21,7 +21,7 @@ public:
         RuneStrArray::const_iterator right = begin;
 
         while (right != end) {
-            if (right->rune < 0x80) {
+            if (right->rune < 0x80) { //asc码
                 if (left != right) {
                     InternalCut(left, right, res);
                 }
@@ -29,13 +29,13 @@ public:
                 left = right;
 
                 do {
-                    right = SequentialLetterRule(left, end);
+                    right = SequentialLetterRule(left, end);//非英文字符则返回left，否则返回left后非英文字母的位置
 
                     if (right != left) {
                         break;
                     }
 
-                    right = NumbersRule(left, end);
+                    right = NumbersRule(left, end);//非数字则返回left，否则返回left后非数字的位置
 
                     if (right != left) {
                         break;
@@ -61,7 +61,10 @@ public:
                      size_t) const override {
 
     }
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t) const override {
 
+    }
 private:
     // sequential letters rule
     RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin,
@@ -135,8 +138,10 @@ private:
         size_t now, old, stat;
         double tmp, endE, endS;
 
-        vector<int> path(XYSize);
-        vector<double> weight(XYSize);
+        //vector<int> path(XYSize);
+        //vector<double> weight(XYSize);
+        int path[XYSize];
+        double weight[XYSize];
 
         //start
         for (size_t y = 0; y < Y; y++) {
diff --git a/libchinese-segmentation/cppjieba/Jieba.hpp b/libchinese-segmentation/cppjieba/Jieba.hpp
index c862fd8..c017bd6 100644
--- a/libchinese-segmentation/cppjieba/Jieba.hpp
+++ b/libchinese-segmentation/cppjieba/Jieba.hpp
@@ -18,9 +18,9 @@ public:
           model_(model_path),
           mp_seg_(&dict_trie_),
           hmm_seg_(&model_),
-          mix_seg_(&dict_trie_, &model_),
+          mix_seg_(&dict_trie_, &model_, stopWordPath),
           full_seg_(&dict_trie_),
-          query_seg_(&dict_trie_, &model_),
+          query_seg_(&dict_trie_, &model_, stopWordPath),
           extractor(&dict_trie_, &model_, idfPath, stopWordPath){ }
     ~Jieba() { }
 
diff --git a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
index 3bcbc54..f87ad5f 100644
--- a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
+++ b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <cmath>
-#include <set>
 #include "MixSegment.hpp"
 
 namespace cppjieba {
@@ -12,25 +11,24 @@ using namespace std;
 /*utf8*/
 class KeywordExtractor {
 public:
-    struct Word {
-        string word;
-        vector<size_t> offsets;
-        double weight;
-    }; // struct Word
+//    struct Word {
+//        string word;
+//        vector<size_t> offsets;
+//        double weight;
+//    }; // struct Word
 
     KeywordExtractor(const DictTrie* dictTrie,
                      const HMMModel* model,
                      const string& idfPath,
                      const string& stopWordPath)
-        : segment_(dictTrie, model) {
+        : segment_(dictTrie, model, stopWordPath) {
         LoadIdfDict(idfPath);
-        LoadStopWordDict(stopWordPath);
     }
     ~KeywordExtractor() {
     }
 
     void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
-        vector<Word> topWords;
+        vector<KeyWord> topWords;
         Extract(sentence, topWords, topN);
 
         for (size_t i = 0; i < topWords.size(); i++) {
@@ -39,7 +37,7 @@ public:
     }
 
     void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
-        vector<Word> topWords;
+        vector<KeyWord> topWords;
         Extract(sentence, topWords, topN);
 
         for (size_t i = 0; i < topWords.size(); i++) {
@@ -47,34 +45,24 @@ public:
         }
     }
 
-    void Extract(const string& sentence, vector<Word>& keywords, size_t topN) const {
-        vector<string> words;
-        segment_.CutToStr(sentence, words);//将字符串string分解为words放入vector
+    void Extract(const string& sentence, vector<KeyWord>& keywords, size_t topN) const {
 
-        map<string, Word> wordmap;//插入字符串与Word的map，相同string统计词频叠加权重
-        size_t offset = 0;
-
-        for (size_t i = 0; i < words.size(); ++i) {
-            size_t t = offset;
-            offset += words[i].size();
-
-            if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
+        unordered_map<string, KeyWord> wordmap;//插入字符串与Word的map，相同string统计词频叠加权重
+        PreFilter pre_filter(symbols_, sentence);
+        RuneStrArray::const_iterator null_p;
+        WordRange range(null_p, null_p);
+        bool isNull(false);
+        while (pre_filter.Next(range, isNull)) {
+            if (isNull) {
                 continue;
             }
-
-            wordmap[words[i]].offsets.push_back(t);
-            wordmap[words[i]].weight += 1.0;
-        }
-
-        if (offset != sentence.size()) {
-            XLOG(ERROR) << "words illegal";
-            return;
+            segment_.CutToStr(sentence, range,  wordmap);
         }
 
         keywords.clear();
         keywords.reserve(wordmap.size());
 
-        for (map<string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
+        for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
             unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);//IDF词典查找
 
             if (cit != idfMap_.end()) {
@@ -129,22 +117,8 @@ private:
         idfAverage_ = idfSum / lineno;
         assert(idfAverage_ > 0.0);
     }
-    void LoadStopWordDict(const string& filePath) {
-        ifstream ifs(filePath.c_str());
-        if(not ifs.is_open()){
-            return ;
-        }
-        XCHECK(ifs.is_open()) << "open " << filePath << " failed";
-        string line ;
 
-        while (getline(ifs, line)) {
-            stopWords_.insert(line);
-        }
-
-        assert(stopWords_.size());
-    }
-
-    static bool Compare(const Word& lhs, const Word& rhs) {
+    static bool Compare(const KeyWord& lhs, const KeyWord& rhs) {
         return lhs.weight > rhs.weight;
     }
 
@@ -152,10 +126,10 @@ private:
     unordered_map<string, double> idfMap_;
     double idfAverage_;
 
-    unordered_set<string> stopWords_;
+    unordered_set<Rune> symbols_;
 }; // class KeywordExtractor
 
-inline ostream& operator << (ostream& os, const KeywordExtractor::Word& word) {
+inline ostream& operator << (ostream& os, const KeyWord& word) {
     return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
            "}";
 }
diff --git a/libchinese-segmentation/cppjieba/MPSegment.hpp b/libchinese-segmentation/cppjieba/MPSegment.hpp
index 149af03..d615fe2 100644
--- a/libchinese-segmentation/cppjieba/MPSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MPSegment.hpp
@@ -32,7 +32,10 @@ public:
                      size_t) const override {
 
     }
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t) const override {
 
+    }
     const DictTrie* GetDictTrie() const override {
         return dictTrie_;
     }
@@ -46,13 +49,14 @@ public:
     }
 private:
     void CalcDP(vector<DatDag>& dags) const {
+        double val(0);
         for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) {
             rit->max_next = -1;
             rit->max_weight = MIN_DOUBLE;
 
             for (const auto & it : rit->nexts) {
                 const auto nextPos = it.first;
-                double val = dictTrie_->GetMinWeight();
+                val = dictTrie_->GetMinWeight();
 
                 if (nullptr != it.second) {
                     val = it.second->weight;
diff --git a/libchinese-segmentation/cppjieba/MixSegment.hpp b/libchinese-segmentation/cppjieba/MixSegment.hpp
index 489df4f..4c93748 100644
--- a/libchinese-segmentation/cppjieba/MixSegment.hpp
+++ b/libchinese-segmentation/cppjieba/MixSegment.hpp
@@ -9,8 +9,11 @@
 namespace cppjieba {
 class MixSegment: public SegmentTagged {
 public:
-    MixSegment(const DictTrie* dictTrie, const HMMModel* model)
+    MixSegment(const DictTrie* dictTrie,
+               const HMMModel* model,
+               const string& stopWordPath)
         : mpSeg_(dictTrie), hmmSeg_(model) {
+        LoadStopWordDict(stopWordPath);
     }
     ~MixSegment() {}
 
@@ -81,16 +84,20 @@ public:
 
         for (size_t i = 0; i < words.size(); i++) {
             //if mp Get a word, it's ok, put it into result
-            if (words[i].left != words[i].right || (words[i].left == words[i].right &&
-                                                    mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
+            if (words[i].left != words[i].right) {
+                res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
+                continue;
+            }
+            if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
+                    || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
                 res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
                 continue;
             }
 
             // if mp Get a single one and it is not in userdict, collect it in sequence
-            size_t j = i;
+            size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里（i字符不是最后一个字符），直接判定j字符
 
-            while (j < words.size() && words[j].left == words[j].right &&
+            while (j < (words.size() - 1) && words[j].left == words[j].right &&
                    !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
                 j++;
             }
@@ -113,6 +120,70 @@ public:
         }
     }
 
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t) const override {
+        vector<WordRange> words;
+        assert(end >= begin);
+        words.reserve(end - begin);
+        mpSeg_.CutRuneArray(begin, end, words);
+
+        vector<WordRange> hmmRes;
+        hmmRes.reserve(end - begin);
+
+        for (size_t i = 0; i < words.size(); i++) {
+
+            string str = GetStringFromRunes(s, words[i].left, words[i].right);
+
+            if (stopWords_.find(str) != stopWords_.end()) {
+                continue;
+            }
+
+            if (words[i].left != words[i].right) {
+                res[str].offsets.push_back(words[i].left->offset);
+                res[str].weight += 1.0;
+                continue;
+            }
+            if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
+                    || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
+                if (stopWords_.find(str) != stopWords_.end()) {
+                    continue;
+                }
+                res[str].offsets.push_back(words[i].left->offset);
+                res[str].weight += 1.0;
+                continue;
+            }
+
+            // if mp Get a single one and it is not in userdict, collect it in sequence
+            size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里（i字符不是最后一个字符），直接判定j字符
+
+            while (j < (words.size() - 1) && words[j].left == words[j].right &&
+                   !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
+                j++;
+            }
+
+            // Cut the sequence with hmm
+            assert(j - 1 >= i);
+            // TODO
+            hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
+
+            //put hmm result to result
+            for (size_t k = 0; k < hmmRes.size(); k++) {
+                string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right);
+                if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) {
+                    continue;
+                }
+                res[hmmStr].offsets.push_back(hmmRes[k].left->offset);
+                res[hmmStr].weight += 1.0;
+            }
+
+            //clear tmp vars
+            hmmRes.clear();
+
+            //let i jump over this piece
+            i = j - 1;
+        }
+    }
+
     const DictTrie* GetDictTrie() const override {
         return mpSeg_.GetDictTrie();
     }
@@ -125,7 +196,23 @@ public:
         return tagger_.LookupTag(str, *this);
     }
 
+    void LoadStopWordDict(const string& filePath) {
+        ifstream ifs(filePath.c_str());
+        if(not ifs.is_open()){
+            return ;
+        }
+        XCHECK(ifs.is_open()) << "open " << filePath << " failed";
+        string line ;
+
+        while (getline(ifs, line)) {
+            stopWords_.insert(line);
+        }
+
+        assert(stopWords_.size());
+    }
 private:
+    unordered_set<string> stopWords_;
+
     MPSegment mpSeg_;
     HMMSegment hmmSeg_;
     PosTagger tagger_;
diff --git a/libchinese-segmentation/cppjieba/PreFilter.hpp b/libchinese-segmentation/cppjieba/PreFilter.hpp
index 4830f2f..1a75a57 100644
--- a/libchinese-segmentation/cppjieba/PreFilter.hpp
+++ b/libchinese-segmentation/cppjieba/PreFilter.hpp
@@ -22,6 +22,73 @@ public:
     bool HasNext() const {
         return cursor_ != sentence_.end();
     }
+    bool Next(WordRange& wordRange) {
+
+        if (cursor_ == sentence_.end()) {
+            return false;
+        }
+
+        wordRange.left = cursor_;
+
+        while (cursor_->rune == 0x20 && cursor_ != sentence_.end()) {
+            cursor_++;
+        }
+
+        if (cursor_ == sentence_.end()) {
+            wordRange.right = cursor_;
+            return true;
+        }
+
+        while (++cursor_ != sentence_.end()) {
+            if (cursor_->rune == 0x20) {
+                wordRange.right = cursor_;
+                return true;
+            }
+        }
+
+        wordRange.right = sentence_.end();
+        return true;
+    }
+
+    bool Next(WordRange& wordRange, bool& isNull) {
+        isNull = false;
+        if (cursor_ == sentence_.end()) {
+            return false;
+        }
+
+        wordRange.left = cursor_;
+
+        if (cursor_->rune == 0x20) {
+            while (cursor_ != sentence_.end()) {
+                if (cursor_->rune != 0x20) {
+                    if (wordRange.left == cursor_) {
+                        cursor_ ++;
+                    }
+                    wordRange.right = cursor_;
+                    isNull = true;
+                    return true;
+                }
+                cursor_ ++;
+            }
+        }
+
+        while (cursor_ != sentence_.end()) {
+            if (cursor_->rune == 0x20) {
+                if (wordRange.left == cursor_) {
+                    cursor_ ++;
+                }
+
+                wordRange.right = cursor_;
+                return true;
+            }
+
+            cursor_ ++;
+        }
+
+        wordRange.right = sentence_.end();
+        return true;
+    }
+
     WordRange Next() {
         WordRange range(cursor_, cursor_);
 
diff --git a/libchinese-segmentation/cppjieba/QuerySegment.hpp b/libchinese-segmentation/cppjieba/QuerySegment.hpp
index 1a8db0e..9db0b97 100644
--- a/libchinese-segmentation/cppjieba/QuerySegment.hpp
+++ b/libchinese-segmentation/cppjieba/QuerySegment.hpp
@@ -14,8 +14,10 @@
 namespace cppjieba {
 class QuerySegment: public SegmentBase {
 public:
-    QuerySegment(const DictTrie* dictTrie, const HMMModel* model)
-        : mixSeg_(dictTrie, model), trie_(dictTrie) {
+    QuerySegment(const DictTrie* dictTrie,
+                 const HMMModel* model,
+                 const string& stopWordPath)
+        : mixSeg_(dictTrie, model, stopWordPath), trie_(dictTrie) {
     }
     ~QuerySegment() {
     }
@@ -59,7 +61,10 @@ public:
                      size_t) const override {
 
     }
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t) const override {
 
+    }
 private:
     bool IsAllAscii(const RuneArray& s) const {
         for (size_t i = 0; i < s.size(); i++) {
diff --git a/libchinese-segmentation/cppjieba/SegmentBase.hpp b/libchinese-segmentation/cppjieba/SegmentBase.hpp
index eff78ea..942e0bd 100644
--- a/libchinese-segmentation/cppjieba/SegmentBase.hpp
+++ b/libchinese-segmentation/cppjieba/SegmentBase.hpp
@@ -23,23 +23,28 @@ public:
     //添加基于sentence的cut方法，减少中间变量的存储与格式转换--jxx20210517
     virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
                      size_t max_word_len) const = 0;
+    virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
+                     size_t max_word_len) const = 0;
     //重写CutToStr函数，简化获取vector<string>& words的流程，降低内存占用--jxx20210517
     void CutToStr(const string& sentence, vector<string>& words, bool hmm = true,
                   size_t max_word_len = MAX_WORD_LENGTH) const {
-/*
-        vector<Word> tmp;
-        CutToWord(sentence, tmp, hmm, max_word_len);
-        GetStringsFromWords(tmp, words);
-*/
         PreFilter pre_filter(symbols_, sentence);
         words.clear();
         words.reserve(sentence.size() / 2);//todo 参考源码，参数待定
-        while (pre_filter.HasNext()) {
-            auto range = pre_filter.Next();
+        RuneStrArray::const_iterator null_p;
+        WordRange range(null_p, null_p);
+        while (pre_filter.Next(range)) {
             CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
         }
     }
-
+    void CutToStr(const string& sentence, WordRange range, vector<string>& words, bool hmm = true,
+                  size_t max_word_len = MAX_WORD_LENGTH) const {
+        CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
+    }
+    void CutToStr(const string& sentence, WordRange range, unordered_map<string, KeyWord>& words, bool hmm = true,
+                  size_t max_word_len = MAX_WORD_LENGTH) const {
+        CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
+    }
     void CutToWord(const string& sentence, vector<Word>& words, bool hmm = true,
                    size_t max_word_len = MAX_WORD_LENGTH) const {
         PreFilter pre_filter(symbols_, sentence);
diff --git a/libchinese-segmentation/cppjieba/Unicode.hpp b/libchinese-segmentation/cppjieba/Unicode.hpp
index a4d765e..d77b5dd 100644
--- a/libchinese-segmentation/cppjieba/Unicode.hpp
+++ b/libchinese-segmentation/cppjieba/Unicode.hpp
@@ -15,6 +15,12 @@ using std::vector;
 
 typedef uint32_t Rune;
 
+struct KeyWord {
+    string word;
+    vector<size_t> offsets;
+    double weight;
+}; // struct Word
+
 struct Word {
     string word;
     uint32_t offset;
@@ -63,7 +69,7 @@ struct WordRange {
         : left(l), right(r) {
     }
     size_t Length() const {
-        return right - left + 1;
+        return right - left;
     }
 
     bool IsAllAscii() const {
@@ -113,11 +119,13 @@ inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
     uint32_t tmp;
     uint32_t offset = 0;
     runes.clear();
-    for(size_t i = 0; i < s.size();) {
-      if(!(s.data()[i] & 0x80)) { // 0xxxxxxx
+    uint32_t len(0);
+    for (size_t i = 0; i < s.size();) {
+      if (!(s.data()[i] & 0x80)) { // 0xxxxxxx
         // 7bit, total 7bit
         tmp = (uint8_t)(s.data()[i]) & 0x7f;
         i++;
+        len = 1;
       } else if ((uint8_t)s.data()[i] <= 0xdf && i + 1 < s.size()) { // 110xxxxxx
         // 5bit, total 5bit
         tmp = (uint8_t)(s.data()[i]) & 0x1f;
@@ -126,6 +134,7 @@ inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
         tmp <<= 6;
         tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
         i += 2;
+        len = 2;
       } else if((uint8_t)s.data()[i] <= 0xef && i + 2 < s.size()) { // 1110xxxxxx
         // 4bit, total 4bit
         tmp = (uint8_t)(s.data()[i]) & 0x0f;
@@ -139,6 +148,7 @@ inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
         tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
 
         i += 3;
+        len = 3;
       } else if((uint8_t)s.data()[i] <= 0xf7 && i + 3 < s.size()) { // 11110xxxx
         // 3bit, total 3bit
         tmp = (uint8_t)(s.data()[i]) & 0x07;
@@ -156,10 +166,10 @@ inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
         tmp |= (uint8_t)(s.data()[i+3]) & 0x3f;
 
         i += 4;
+        len = 4;
       } else {
         return false;
       }
-      uint32_t len = limonp::UnicodeToUtf8Bytes(tmp);
       RuneInfo x(tmp, offset, len, i, 1);
       runes.push_back(x);
       offset += len;
@@ -241,9 +251,8 @@ inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left,
 
 inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
     assert(right->offset >= left->offset);
-    uint32_t len = right->offset - left->offset + right->len;
-    uint32_t unicode_length = right->unicode_offset - left->unicode_offset + right->unicode_length;
-    return Word(s.substr(left->offset, len), left->offset, left->unicode_offset, unicode_length).word;
+    //uint32_t len = right->offset - left->offset + right->len;
+    return s.substr(left->offset, right->offset - left->offset + right->len);
 }
 
 inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
diff --git a/libsearch/index/construct-document.cpp b/libsearch/index/construct-document.cpp
index e54ff84..584fddc 100644
--- a/libsearch/index/construct-document.cpp
+++ b/libsearch/index/construct-document.cpp
@@ -120,8 +120,7 @@ void ConstructDocumentForContent::run() {
     content = content.replace("\t", " ").replace("\xEF\xBC\x8C", "   ").replace("\xE3\x80\x82", "   ");
 
 //    QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
-    //修改函数返回类型，修改入参为std::string引用--jxx20210519
-    std::vector<cppjieba::KeywordExtractor::Word> term = ChineseSegmentation::getInstance()->callSegementStd(content.left(20480000).toStdString());
+    std::vector<cppjieba::KeyWord> term = ChineseSegmentation::getInstance()->callSegementStd(content.left(20480000).toStdString());
 
     for(size_t i = 0; i < term.size(); ++i) {
         doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
diff --git a/libsearch/index/file-reader.cpp b/libsearch/index/file-reader.cpp
index f146981..e409374 100644
--- a/libsearch/index/file-reader.cpp
+++ b/libsearch/index/file-reader.cpp
@@ -31,9 +31,8 @@ void FileReader::getTextContent(QString path, QString &textContent) {
     QFileInfo file(path);
     QString strsfx =  file.suffix();
     if(name == "application/zip") {
-        if(strsfx.endsWith("docx")){
+        if(strsfx.endsWith("docx"))
             FileUtils::getDocxTextContent(path, textContent);
-        }
         if(strsfx.endsWith("pptx"))
             FileUtils::getPptxTextContent(path, textContent);
         if(strsfx.endsWith("xlsx"))
diff --git a/libsearch/index/first-index.cpp b/libsearch/index/first-index.cpp
index f94b625..d23ba88 100644
--- a/libsearch/index/first-index.cpp
+++ b/libsearch/index/first-index.cpp
@@ -49,7 +49,7 @@ void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
     this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
     if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) {
         //this->q_content_index->enqueue(fileInfo.absoluteFilePath());
-        if(fileInfo.fileName().split(".").last() == "docx"){
+        if (fileInfo.fileName().split(".").last() == "docx") {
             QuaZip file(fileInfo.absoluteFilePath());
             if(!file.open(QuaZip::mdUnzip))
                 return;
@@ -57,10 +57,8 @@ void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
                 return;
             QuaZipFile fileR(&file);
             this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//docx解压缩后的xml文件为实际需要解析文件大小
-            qDebug() << "文件路径:" <<fileInfo.absoluteFilePath();
-            qDebug() << "文件大小:" << fileR.usize();
             file.close();
-        }else if(fileInfo.fileName().split(".").last() == "pptx"){
+        } else if (fileInfo.fileName().split(".").last() == "pptx") {
             QuaZip file(fileInfo.absoluteFilePath());
             if(!file.open(QuaZip::mdUnzip))
                 return;
@@ -79,10 +77,8 @@ void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
                 }
             }
             file.close();
-            qDebug() << "文件路径:" <<fileInfo.absoluteFilePath();
-            qDebug() << "文件大小:" << fileSize;
             this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileSize));//pptx解压缩后的xml文件为实际需要解析文件大小
-        }else if(fileInfo.fileName().split(".").last() == "xlsx"){
+        } else if (fileInfo.fileName().split(".").last() == "xlsx") {
             QuaZip file(fileInfo.absoluteFilePath());
             if(!file.open(QuaZip::mdUnzip))
                 return;
@@ -90,10 +86,8 @@ void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
                 return;
             QuaZipFile fileR(&file);
             this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//xlsx解压缩后的xml文件为实际解析文件大小
-            qDebug() << "文件路径:" <<fileInfo.absoluteFilePath();
-            qDebug() << "文件大小:" << fileR.usize();
             file.close();
-        }else{
+        } else {
             this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
         }
     }
@@ -220,9 +214,17 @@ void FirstIndex::run() {
                 //                for (size_t i = 0; (i < this->u_send_length) && (!this->q_content_index->empty()); ++i){
                 qint64 fileSize = 0;
                 //修改一次处理的数据量，从30个文件改为文件总大小为50M以下，50M为暂定值--jxx20210519
-                for(size_t i = 0;/* (i < 30) && */(fileSize < 50*1024*1024) && (!this->q_content_index->empty()); ++i) {
+                for(size_t i = 0;/* (i < 30) && (fileSize < 52428800) && */(!this->q_content_index->empty()); ++i) {
                     QPair<QString,qint64> tempPair = this->q_content_index->dequeue();
                     fileSize += tempPair.second;
+                    if (fileSize > 52428800 ) {
+                        if (tmp2->size() == 0) {
+                            tmp2->enqueue(tempPair.first);
+                            break;
+                        }
+                        this->q_content_index->enqueue(tempPair);
+                        break;
+                    }
                     tmp2->enqueue(tempPair.first);
                 }
                 //                qDebug() << ">>>>>>>>all fileSize:" << fileSize << "file num:" << tmp->size() << "<<<<<<<<<<<<<<<<<<<";