From 91705118f8b79336b1ba8554827ae843ecc93cc0 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Fri, 18 Jun 2021 17:28:27 +0800 Subject: [PATCH 01/20] Remove black list check when add one(under '/home' or not). --- libsearch/global-settings.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libsearch/global-settings.cpp b/libsearch/global-settings.cpp index a46e857..cba42e5 100644 --- a/libsearch/global-settings.cpp +++ b/libsearch/global-settings.cpp @@ -146,11 +146,11 @@ bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool rem m_block_dirs_settings->remove(path); return true; } - if(!path.startsWith("/home")) { +// if(!path.startsWith("/home")) { // returnCode = QString(tr("I can only search your user directory, it doesn't make any sense if you block an directory which is not in user directory!")); - returnCode = PATH_NOT_IN_HOME; - return false; - } +// returnCode = PATH_NOT_IN_HOME; +// return false; +// } //why QSetting's key can't start with "/"?? QString pathKey = path.right(path.length() - 1); From 2d5fae69d670da86d252514f68c9e3387170df45 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Sat, 19 Jun 2021 15:11:31 +0800 Subject: [PATCH 02/20] Update changelog. --- debian/changelog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/debian/changelog b/debian/changelog index 1a17c87..306ca47 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,13 @@ +ukui-search (0.4.0+0619) v101; urgency=medium + + * Bug 无 + * 需求6732,6733,6734,6938 + * 其他改动: + * Add inotify events queue for merging events,reduce disk io operations. + - 增加inotify信号合并缓冲队列,减少90%以上磁盘io操作。 + + -- zhangpengfei Sat, 19 Jun 2021 09:12:10 +0800 + ukui-search (0.4.0+0612) v101; urgency=medium * Bug 无 From bbf27b9d5fbe740cb7b46e4b9bb4d5cdca141e43 Mon Sep 17 00:00:00 2001 From: rookie-J Date: Wed, 23 Jun 2021 15:50:19 +0800 Subject: [PATCH 03/20] =?UTF-8?q?Optimization=20of=20IDF=20dictionary=20lo?= =?UTF-8?q?ading=20mode=EF=BC=9B=20Limit=20the=20maximum=20number=20of=20w?= =?UTF-8?q?ords=20segmentation;=20Other=20optimization;?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libchinese-segmentation/cppjieba/DatTrie.hpp | 114 ++++++++++++++- libchinese-segmentation/cppjieba/DictTrie.hpp | 2 +- .../cppjieba/HMMSegment.hpp | 8 +- libchinese-segmentation/cppjieba/IdfTrie.hpp | 134 ++++++++++++++++++ libchinese-segmentation/cppjieba/Jieba.hpp | 2 +- .../cppjieba/KeywordExtractor.hpp | 60 ++------ .../cppjieba/MixSegment.hpp | 5 +- .../cppjieba/PreFilter.hpp | 7 +- libchinese-segmentation/cppjieba/Unicode.hpp | 18 --- libchinese-segmentation/cppjieba/cppjieba.pri | 2 +- .../libchinese-segmentation.pro | 2 + libsearch/index/construct-document.cpp | 11 +- libsearch/index/document.cpp | 20 ++- libsearch/index/document.h | 7 +- libsearch/index/first-index.cpp | 2 +- libsearch/index/index-generator.cpp | 6 +- libsearch/libsearch.pro | 2 +- src/src.pro | 2 +- ukui-search.pro | 1 + 19 files changed, 309 insertions(+), 96 deletions(-) create mode 100644 libchinese-segmentation/cppjieba/IdfTrie.hpp diff --git a/libchinese-segmentation/cppjieba/DatTrie.hpp b/libchinese-segmentation/cppjieba/DatTrie.hpp index d4e64d1..a4967a3 100644 --- a/libchinese-segmentation/cppjieba/DatTrie.hpp +++ b/libchinese-segmentation/cppjieba/DatTrie.hpp @@ -33,6 +33,19 @@ struct DatElement { } }; +struct IdfElement { + string word; + double idf = 0; + + bool operator < (const IdfElement & b) const { + if (word == b.word) { + return this->idf > b.idf; + } + + return this->word < b.word; + } +}; + inline std::ostream & operator << (std::ostream& os, const DatElement & elem) { return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight; } @@ -91,13 +104,24 @@ public: JiebaDAT::result_pair_type find_result; dat_.exactMatchSearch(key.c_str(), find_result); - if ((0 == find_result.length) || (find_result.value < 0) || (find_result.value >= elements_num_)) { + if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) { return nullptr; } return &elements_ptr_[ find_result.value ]; } + const double Find(const string & key, std::size_t length, std::size_t node_pos) const { + JiebaDAT::result_pair_type find_result; + dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos); + + if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) { + return -1; + } + + return idf_elements_ptr_[ find_result.value ]; + } + void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector&res, size_t max_word_len) const { @@ -119,7 +143,7 @@ public: for (std::size_t idx = 0; idx < num_results; ++idx) { auto & match = result_pairs[idx]; - if ((match.value < 0) || (match.value >= elements_num_)) { + if ((match.value < 0) || ((size_t)match.value >= elements_num_)) { continue; } @@ -156,6 +180,11 @@ public: return InitAttachDat(dat_cache_file, md5); } + bool InitBuildDat(vector& elements, const string & dat_cache_file, const string & md5) { + BuildDatCache(elements, dat_cache_file, md5); + return InitIdfAttachDat(dat_cache_file, md5); + } + bool InitAttachDat(const string & dat_cache_file, const string & md5) { mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY); @@ -187,6 +216,37 @@ public: return true; } + bool InitIdfAttachDat(const string & dat_cache_file, const string & md5) { + mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY); + + if (mmap_fd_ < 0) { + return false; + } + + const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END); + assert(seek_off >= 0); + mmap_length_ = seek_off; + + mmap_addr_ = reinterpret_cast(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0)); + assert(MAP_FAILED != mmap_addr_); + + assert(mmap_length_ >= sizeof(CacheFileHeader)); + CacheFileHeader & header = *reinterpret_cast(mmap_addr_); + elements_num_ = header.elements_num; + min_weight_ = header.min_weight; + assert(sizeof(header.md5_hex) == md5.size()); + + if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) { + return false; + } + + assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double) + header.dat_size * dat_.unit_size()); + idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header)); + const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_; + dat_.set_array(dat_ptr, header.dat_size); + return true; + } + private: void BuildDatCache(vector& elements, const string & dat_cache_file, const string & md5) { std::sort(elements.begin(), elements.end()); @@ -240,12 +300,62 @@ private: } } + void BuildDatCache(vector& elements, const string & dat_cache_file, const string & md5) { + std::sort(elements.begin(), elements.end()); + + vector keys_ptr_vec; + vector values_vec; + vector mem_elem_vec; + + keys_ptr_vec.reserve(elements.size()); + values_vec.reserve(elements.size()); + mem_elem_vec.reserve(elements.size()); + + CacheFileHeader header; + header.min_weight = min_weight_; + assert(sizeof(header.md5_hex) == md5.size()); + memcpy(&header.md5_hex[0], md5.c_str(), md5.size()); + + for (size_t i = 0; i < elements.size(); ++i) { + keys_ptr_vec.push_back(elements[i].word.data()); + values_vec.push_back(i); + mem_elem_vec.push_back(elements[i].idf); + } + + auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]); + assert(0 == ret); + header.elements_num = mem_elem_vec.size(); + header.dat_size = dat_.size(); + + { + string tmp_filepath = string(dat_cache_file) + "_XXXXXX"; + ::umask(S_IWGRP | S_IWOTH); + //const int fd =::mkstemp(&tmp_filepath[0]); + //原mkstemp用法有误,已修复--jxx20210519 + const int fd =::mkstemp((char *)tmp_filepath.data()); + qDebug() << "mkstemp error:" << errno << tmp_filepath.data(); + assert(fd >= 0); + ::fchmod(fd, 0644); + + auto write_bytes = ::write(fd, (const char *)&header, sizeof(header)); + write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(double) * mem_elem_vec.size()); + write_bytes += ::write(fd, dat_.array(), dat_.total_size()); + + assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(double) + dat_.total_size()); + ::close(fd); + + const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str()); + assert(0 == rename_ret); + } + } + DatTrie(const DatTrie &); DatTrie &operator=(const DatTrie &); private: JiebaDAT dat_; const DatMemElem * elements_ptr_ = nullptr; + const double * idf_elements_ptr_= nullptr; size_t elements_num_ = 0; double min_weight_ = 0; diff --git a/libchinese-segmentation/cppjieba/DictTrie.hpp b/libchinese-segmentation/cppjieba/DictTrie.hpp index 698f5d8..5ecee54 100644 --- a/libchinese-segmentation/cppjieba/DictTrie.hpp +++ b/libchinese-segmentation/cppjieba/DictTrie.hpp @@ -130,7 +130,7 @@ private: dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache"; } QString path = QString::fromStdString(dat_cache_path); - qDebug() << "#########path:" << path; + qDebug() << "#########Dict path:" << path; if (dat_.InitAttachDat(dat_cache_path, md5)) { LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_; total_dict_size_ = file_size_sum; diff --git a/libchinese-segmentation/cppjieba/HMMSegment.hpp b/libchinese-segmentation/cppjieba/HMMSegment.hpp index 1a9937b..30af449 100644 --- a/libchinese-segmentation/cppjieba/HMMSegment.hpp +++ b/libchinese-segmentation/cppjieba/HMMSegment.hpp @@ -138,10 +138,10 @@ private: size_t now, old, stat; double tmp, endE, endS; - //vector path(XYSize); - //vector weight(XYSize); - int path[XYSize]; - double weight[XYSize]; + vector path(XYSize); + vector weight(XYSize); + //int path[XYSize]; + //double weight[XYSize]; //start for (size_t y = 0; y < Y; y++) { diff --git a/libchinese-segmentation/cppjieba/IdfTrie.hpp b/libchinese-segmentation/cppjieba/IdfTrie.hpp new file mode 100644 index 0000000..b26decf --- /dev/null +++ b/libchinese-segmentation/cppjieba/IdfTrie.hpp @@ -0,0 +1,134 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "limonp/StringUtil.hpp" +#include "limonp/Logging.hpp" +#include "Unicode.hpp" +#include "DatTrie.hpp" +#include +namespace cppjieba { + +using namespace limonp; + +const size_t IDF_COLUMN_NUM = 2; + +class IdfTrie { +public: + enum UserWordWeightOption { + WordWeightMin, + WordWeightMedian, + WordWeightMax, + }; // enum UserWordWeightOption + + IdfTrie(const string& dict_path, const string & dat_cache_path = "", + UserWordWeightOption user_word_weight_opt = WordWeightMedian) { + Init(dict_path, dat_cache_path, user_word_weight_opt); + } + + ~IdfTrie() {} + + double Find(const string & word, std::size_t length = 0, std::size_t node_pos = 0) const { + return dat_.Find(word, length, node_pos); + } + + void Find(RuneStrArray::const_iterator begin, + RuneStrArray::const_iterator end, + vector&res, + size_t max_word_len = MAX_WORD_LENGTH) const { + dat_.Find(begin, end, res, max_word_len); + } + + bool IsUserDictSingleChineseWord(const Rune& word) const { + return IsIn(user_dict_single_chinese_word_, word); + } + + double GetMinWeight() const { + return dat_.GetMinWeight(); + } + + size_t GetTotalDictSize() const { + return total_dict_size_; + } + +private: + void Init(const string& dict_path, string dat_cache_path, + UserWordWeightOption user_word_weight_opt) { + size_t file_size_sum = 0; + const string md5 = CalcFileListMD5(dict_path, file_size_sum); + + if (dat_cache_path.empty()) { + //未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519 + dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache"; + } + QString path = QString::fromStdString(dat_cache_path); + qDebug() << "#########Idf path:" << path; + if (dat_.InitIdfAttachDat(dat_cache_path, md5)) { + total_dict_size_ = file_size_sum; + return; + } + + LoadDefaultIdf(dict_path); + double idf_sum_ = CalcIdfSum(static_node_infos_); + assert(static_node_infos_.size()); + idfAverage_ = idf_sum_ / static_node_infos_.size(); + assert(idfAverage_ > 0.0); + double min_weight = 0; + dat_.SetMinWeight(min_weight); + + const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5); + assert(build_ret); + total_dict_size_ = file_size_sum; + vector().swap(static_node_infos_); + } + + void LoadDefaultIdf(const string& filePath) { + ifstream ifs(filePath.c_str()); + if(not ifs.is_open()){ + return ; + } + XCHECK(ifs.is_open()) << "open " << filePath << " failed."; + string line; + vector buf; + size_t lineno = 0; + + for (; getline(ifs, line); lineno++) { + if (line.empty()) { + XLOG(ERROR) << "lineno: " << lineno << " empty. skipped."; + continue; + } + Split(line, buf, " "); + XCHECK(buf.size() == IDF_COLUMN_NUM) << "split result illegal, line:" << line; + IdfElement node_info; + node_info.word = buf[0]; + node_info.idf = atof(buf[1].c_str()); + static_node_infos_.push_back(node_info); + } + } + + double CalcIdfSum(const vector& node_infos) const { + double sum = 0.0; + + for (size_t i = 0; i < node_infos.size(); i++) { + sum += node_infos[i].idf; + } + + return sum; + } +public: + double idfAverage_; +private: + vector static_node_infos_; + size_t total_dict_size_ = 0; + DatTrie dat_; + unordered_set user_dict_single_chinese_word_; +}; +} + diff --git a/libchinese-segmentation/cppjieba/Jieba.hpp b/libchinese-segmentation/cppjieba/Jieba.hpp index c017bd6..a7b11b3 100644 --- a/libchinese-segmentation/cppjieba/Jieba.hpp +++ b/libchinese-segmentation/cppjieba/Jieba.hpp @@ -21,7 +21,7 @@ public: mix_seg_(&dict_trie_, &model_, stopWordPath), full_seg_(&dict_trie_), query_seg_(&dict_trie_, &model_, stopWordPath), - extractor(&dict_trie_, &model_, idfPath, stopWordPath){ } + extractor(&dict_trie_, &model_, idfPath, dat_cache_path,stopWordPath){ } ~Jieba() { } void Cut(const string& sentence, vector& words, bool hmm = true) const { diff --git a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp index f87ad5f..0011e93 100644 --- a/libchinese-segmentation/cppjieba/KeywordExtractor.hpp +++ b/libchinese-segmentation/cppjieba/KeywordExtractor.hpp @@ -2,6 +2,7 @@ #include #include "MixSegment.hpp" +#include "IdfTrie.hpp" namespace cppjieba { @@ -11,18 +12,14 @@ using namespace std; /*utf8*/ class KeywordExtractor { public: -// struct Word { -// string word; -// vector offsets; -// double weight; -// }; // struct Word KeywordExtractor(const DictTrie* dictTrie, const HMMModel* model, const string& idfPath, + const string& dat_cache_path, const string& stopWordPath) - : segment_(dictTrie, model, stopWordPath) { - LoadIdfDict(idfPath); + : segment_(dictTrie, model, stopWordPath), + idf_trie_(idfPath,dat_cache_path){ } ~KeywordExtractor() { } @@ -63,12 +60,11 @@ public: keywords.reserve(wordmap.size()); for (unordered_map::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) { - unordered_map::const_iterator cit = idfMap_.find(itr->first);//IDF词典查找 - - if (cit != idfMap_.end()) { - itr->second.weight *= cit->second; + double idf = idf_trie_.Find(itr->first); + if (-1 != idf) {//IDF词典查找 + itr->second.weight *= idf; } else { - itr->second.weight *= idfAverage_; + itr->second.weight *= idf_trie_.idfAverage_; } itr->second.word = itr->first; @@ -80,51 +76,13 @@ public: keywords.resize(topN); } private: - void LoadIdfDict(const string& idfPath) { - ifstream ifs(idfPath.c_str()); - if(not ifs.is_open()){ - return ; - } - XCHECK(ifs.is_open()) << "open " << idfPath << " failed"; - string line ; - vector buf; - double idf = 0.0; - double idfSum = 0.0; - size_t lineno = 0; - - for (; getline(ifs, line); lineno++) { - buf.clear(); - - if (line.empty()) { - XLOG(ERROR) << "lineno: " << lineno << " empty. skipped."; - continue; - } - - Split(line, buf, " "); - - if (buf.size() != 2) { - XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped."; - continue; - } - - idf = atof(buf[1].c_str()); - idfMap_[buf[0]] = idf; - idfSum += idf; - - } - - assert(lineno); - idfAverage_ = idfSum / lineno; - assert(idfAverage_ > 0.0); - } static bool Compare(const KeyWord& lhs, const KeyWord& rhs) { return lhs.weight > rhs.weight; } MixSegment segment_; - unordered_map idfMap_; - double idfAverage_; + IdfTrie idf_trie_; unordered_set symbols_; }; // class KeywordExtractor diff --git a/libchinese-segmentation/cppjieba/MixSegment.hpp b/libchinese-segmentation/cppjieba/MixSegment.hpp index 4c93748..a539039 100644 --- a/libchinese-segmentation/cppjieba/MixSegment.hpp +++ b/libchinese-segmentation/cppjieba/MixSegment.hpp @@ -156,8 +156,9 @@ public: // if mp Get a single one and it is not in userdict, collect it in sequence size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里(i字符不是最后一个字符),直接判定j字符 - while (j < (words.size() - 1) && words[j].left == words[j].right && - !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) { + while (j < (words.size() - 1) + && words[j].left == words[j].right + && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) { j++; } diff --git a/libchinese-segmentation/cppjieba/PreFilter.hpp b/libchinese-segmentation/cppjieba/PreFilter.hpp index 1a75a57..3f04dcf 100644 --- a/libchinese-segmentation/cppjieba/PreFilter.hpp +++ b/libchinese-segmentation/cppjieba/PreFilter.hpp @@ -71,7 +71,7 @@ public: cursor_ ++; } } - + int num = 0; while (cursor_ != sentence_.end()) { if (cursor_->rune == 0x20) { if (wordRange.left == cursor_) { @@ -83,6 +83,11 @@ public: } cursor_ ++; + num++; + if (num >= 1024) { //todo 防止一次性传入过多字节,暂定限制为1024个字 + wordRange.right = cursor_; + return true; + } } wordRange.right = sentence_.end(); diff --git a/libchinese-segmentation/cppjieba/Unicode.hpp b/libchinese-segmentation/cppjieba/Unicode.hpp index d77b5dd..360b461 100644 --- a/libchinese-segmentation/cppjieba/Unicode.hpp +++ b/libchinese-segmentation/cppjieba/Unicode.hpp @@ -97,24 +97,6 @@ inline RuneArray DecodeRunesInString(const string& s) { //重写DecodeRunesInString函数,将实现放入函数中降低内存占用加快处理流程--jxx20210518 inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) { -/* - RuneArray arr; - - if (not DecodeRunesInString(s, arr)) { - return false; - } - - runes.clear(); - - uint32_t offset = 0; - - for (uint32_t i = 0; i < arr.size(); ++i) { - const uint32_t len = limonp::UnicodeToUtf8Bytes(arr[i]); - RuneInfo x(arr[i], offset, len, i, 1); - runes.push_back(x); - offset += len; - } -*/ uint32_t tmp; uint32_t offset = 0; diff --git a/libchinese-segmentation/cppjieba/cppjieba.pri b/libchinese-segmentation/cppjieba/cppjieba.pri index fd783c4..cec0ba9 100644 --- a/libchinese-segmentation/cppjieba/cppjieba.pri +++ b/libchinese-segmentation/cppjieba/cppjieba.pri @@ -2,6 +2,7 @@ INCLUDEPATH += $$PWD HEADERS += \ $$PWD/DictTrie.hpp \ + $$PWD/IdfTrie.hpp \ $$PWD/FullSegment.hpp \ $$PWD/HMMModel.hpp \ $$PWD/HMMSegment.hpp \ @@ -17,5 +18,4 @@ HEADERS += \ $$PWD/TextRankExtractor.hpp \ $$PWD/Trie.hpp \ $$PWD/Unicode.hpp - include(limonp/limonp.pri) diff --git a/libchinese-segmentation/libchinese-segmentation.pro b/libchinese-segmentation/libchinese-segmentation.pro index 583f794..28fb1a1 100644 --- a/libchinese-segmentation/libchinese-segmentation.pro +++ b/libchinese-segmentation/libchinese-segmentation.pro @@ -19,6 +19,8 @@ DEFINES += QT_DEPRECATED_WARNINGS #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 include(cppjieba/cppjieba.pri) +#LIBS += -L/usr/local/lib/libjemalloc -ljemalloc + SOURCES += \ chinese-segmentation.cpp \ diff --git a/libsearch/index/construct-document.cpp b/libsearch/index/construct-document.cpp index b96ff2f..445bdb5 100644 --- a/libsearch/index/construct-document.cpp +++ b/libsearch/index/construct-document.cpp @@ -108,12 +108,14 @@ void ConstructDocumentForContent::run() { FileReader::getTextContent(m_path, content); if(content.isEmpty()) return; - QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path)); - QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); + //QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path)); + //QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); Document doc; doc.setData(content); - doc.setUniqueTerm(uniqueterm); - doc.addTerm(upTerm); + //doc.setUniqueTerm(uniqueterm); + doc.setUniqueTerm(FileUtils::makeDocUterm(m_path)); + //doc.addTerm(upTerm); + doc.addTerm(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); doc.addValue(m_path); //'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info. @@ -131,6 +133,7 @@ void ConstructDocumentForContent::run() { IndexGenerator::_mutex_doc_list_content.unlock(); content.clear(); content.squeeze(); + term.clear(); term.shrink_to_fit(); return; diff --git a/libsearch/index/document.cpp b/libsearch/index/document.cpp index 5f71336..57f907a 100644 --- a/libsearch/index/document.cpp +++ b/libsearch/index/document.cpp @@ -37,7 +37,7 @@ void Document::addPosting(std::string term, QVector offset, int weight) } } -void Document::addPosting(std::string term, std::vector offset, int weight) { +void Document::addPosting(std::string &term, std::vector &offset, int weight) { if(term == "") return; if(term.length() > 240) @@ -63,6 +63,12 @@ void Document::addTerm(QString term) { m_document.add_term(term.toStdString()); } +void Document::addTerm(std::string term) { + if(term.empty()) + return; + m_document.add_term(term); +} + void Document::addValue(QString value) { m_document.add_value(1, value.toStdString()); } @@ -73,12 +79,20 @@ void Document::setUniqueTerm(QString term) { m_document.add_term(term.toStdString()); // m_unique_term = new QString(term); - m_unique_term = std::move(term); + m_unique_term = std::move(term.toStdString()); } + +void Document::setUniqueTerm(std::string term) { + if(term.empty()) + return; + m_document.add_term(term); + m_unique_term = term; +} + std::string Document::getUniqueTerm() { // qDebug()<<"m_unique_term!"<<*m_unique_term; // qDebug() << QString::fromStdString(m_unique_term.toStdString()); - return m_unique_term.toStdString(); + return m_unique_term;//.toStdString(); } void Document::setIndexText(QStringList indexText) { diff --git a/libsearch/index/document.h b/libsearch/index/document.h index 6025bc8..84e6262 100644 --- a/libsearch/index/document.h +++ b/libsearch/index/document.h @@ -41,11 +41,13 @@ public: } void setData(QString &data); void addPosting(std::string term, QVector offset, int weight = 1); - void addPosting(std::string term, std::vector offset, int weight = 1); + void addPosting(std::string &term, std::vector &offset, int weight = 1); void addPosting(std::string term, unsigned int offset, int weight = 1); void addTerm(QString term); + void addTerm(std::string term); void addValue(QString value); void setUniqueTerm(QString term); + void setUniqueTerm(std::string term); std::string getUniqueTerm(); void setIndexText(QStringList indexText); QStringList getIndexText(); @@ -53,7 +55,8 @@ public: private: Xapian::Document m_document; QStringList m_index_text; - QString m_unique_term; + //QString m_unique_term; + std::string m_unique_term; }; } diff --git a/libsearch/index/first-index.cpp b/libsearch/index/first-index.cpp index d23ba88..fb3ef49 100644 --- a/libsearch/index/first-index.cpp +++ b/libsearch/index/first-index.cpp @@ -154,7 +154,6 @@ void FirstIndex::run() { ++FileUtils::_index_status; - pid_t pid; pid = fork(); if(pid == 0) { @@ -235,6 +234,7 @@ void FirstIndex::run() { qDebug() << "content index end;"; sem.release(2); }); + mutex1.lock(); mutex2.lock(); mutex3.lock(); diff --git a/libsearch/index/index-generator.cpp b/libsearch/index/index-generator.cpp index 8f57d4b..06efc6e 100644 --- a/libsearch/index/index-generator.cpp +++ b/libsearch/index/index-generator.cpp @@ -29,7 +29,7 @@ #include "index-generator.h" #include "chinese-segmentation.h" #include - +#include #define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString() #define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString() @@ -127,11 +127,11 @@ bool IndexGenerator::creatAllIndex(QQueue *messageList) { // GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2"); // FileUtils::_index_status &= ~0x2; qDebug() << "finish creatAllIndex for content"; + IndexGenerator::_doc_list_content.clear(); IndexGenerator::_doc_list_content.squeeze(); QVector().swap(IndexGenerator::_doc_list_content); -// delete _doc_list_content; -// _doc_list_content = nullptr; + malloc_trim(0); } Q_EMIT this->transactionFinished(); return true; diff --git a/libsearch/libsearch.pro b/libsearch/libsearch.pro index 8ffed23..38cbb89 100644 --- a/libsearch/libsearch.pro +++ b/libsearch/libsearch.pro @@ -33,7 +33,7 @@ include(plugininterface/plugin-interface.pri) include(pluginmanage/plugin-manager.pri) LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation -LIBS += -lxapian -lquazip5 -luchardet +LIBS += -lxapian -lquazip5 -luchardet #-L/usr/local/lib/libjemalloc -ljemalloc SOURCES += \ file-utils.cpp \ diff --git a/src/src.pro b/src/src.pro index e207640..ff309e5 100644 --- a/src/src.pro +++ b/src/src.pro @@ -9,7 +9,7 @@ TEMPLATE = app PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 CONFIG += c++11 link_pkgconfig no_keywords lrelease LIBS += -lxapian -lgsettings-qt -lquazip5 -lX11 -LIBS += -lukui-log4qt +#LIBS += -lukui-log4qt -L/usr/local/lib/libjemalloc -ljemalloc # The following define makes your compiler emit warnings if you use # any Qt feature that has been marked deprecated (the exact warnings # depend on your compiler). Please consult the documentation of the diff --git a/ukui-search.pro b/ukui-search.pro index 47447cd..7fd587d 100644 --- a/ukui-search.pro +++ b/ukui-search.pro @@ -19,3 +19,4 @@ src.depends = libsearch CONFIG += ordered + From d3181a36f28a4c748196ec7123e6a04a4a02da69 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Fri, 25 Jun 2021 16:30:46 +0800 Subject: [PATCH 04/20] Close inotify fd after closed file index service. --- libsearch/file-utils.cpp | 16 ++++++++++++---- libsearch/index/inotify-watch.cpp | 7 ++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/libsearch/file-utils.cpp b/libsearch/file-utils.cpp index c6fdacd..f6f8e50 100644 --- a/libsearch/file-utils.cpp +++ b/libsearch/file-utils.cpp @@ -483,8 +483,10 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) { if(!file.open(QuaZip::mdUnzip)) return; - if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) + if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) { + file.close(); return; + } QuaZipFile fileR(&file); fileR.open(QIODevice::ReadOnly); //读取方式打开 @@ -545,8 +547,10 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) { if(i.startsWith(prefix)) fileList << i; } - if(fileList.isEmpty()) + if(fileList.isEmpty()) { + file.close(); return; + } for(int i = 0; i < fileList.size(); ++i){ QString name = prefix + QString::number(i + 1) + ".xml"; @@ -650,8 +654,10 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) { if(!file.open(QuaZip::mdUnzip)) return; - if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) + if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) { + file.close(); return; + } QuaZipFile fileR(&file); fileR.open(QIODevice::ReadOnly); @@ -706,8 +712,10 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) { void FileUtils::getPdfTextContent(QString &path, QString &textcontent) { Poppler::Document *doc = Poppler::Document::load(path); - if(doc->isLocked()) + if(doc->isLocked()) { + delete doc; return; + } const QRectF qf; int pageNum = doc->numPages(); for(int i = 0; i < pageNum; ++i) { diff --git a/libsearch/index/inotify-watch.cpp b/libsearch/index/inotify-watch.cpp index 9ffdc5f..3cd7f6b 100644 --- a/libsearch/index/inotify-watch.cpp +++ b/libsearch/index/inotify-watch.cpp @@ -1,6 +1,7 @@ #include "inotify-watch.h" #include #include +#include using namespace Zeeker; static InotifyWatch* global_instance_InotifyWatch = nullptr; @@ -65,7 +66,7 @@ bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase) // qDebug() << i.value(); if(i.value().length() > path.length()) { if(i.value().startsWith(path)) { - qDebug() << "remove path: " << i.value(); +// qDebug() << "remove path: " << i.value(); inotify_rm_watch(m_inotifyFd, currentPath.key(path)); currentPath.erase(i++); } else { @@ -132,6 +133,8 @@ void InotifyWatch::run() if (m_inotifyFd > 0) { qDebug()<<"Inotify init success!"; } else { + printf("errno=%d\n",errno); + printf("Mesg:%s\n",strerror(errno)); Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify"); } @@ -205,10 +208,12 @@ void InotifyWatch::run() assert(false); } } + qDebug() << "Leave watch loop"; if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) { IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3"); removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false); } + close(m_inotifyFd); // fcntl(m_inotifyFd, F_SETFD, FD_CLOEXEC); // m_notifier = new QSocketNotifier(m_inotifyFd, QSocketNotifier::Read); // connect(m_notifier, &QSocketNotifier::activated, this, &InotifyWatch::slotEvent, Qt::DirectConnection); From 455f87b851127696bb562c19bda0dddeba6c092f Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Sat, 26 Jun 2021 14:07:28 +0800 Subject: [PATCH 05/20] [Fix] Tray icon click won't work after Win+D. --- src/mainwindow.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mainwindow.cpp b/src/mainwindow.cpp index 2c593da..cf80e43 100644 --- a/src/mainwindow.cpp +++ b/src/mainwindow.cpp @@ -129,6 +129,8 @@ MainWindow::MainWindow(QWidget *parent) : this->m_searchLayout->focusIn(); //打开主界面时输入框夺焦,可直接输入 this->raise(); this->activateWindow(); + } else if(this->isVisible()&&!this->isActiveWindow()) { + this->activateWindow(); } else { tryHideMainwindow(); } From 4b02fdbb34d2df2954420e35ce8fee59daa45bc6 Mon Sep 17 00:00:00 2001 From: rookie-J Date: Mon, 28 Jun 2021 19:38:27 +0800 Subject: [PATCH 06/20] Fix a bug for app match; --- libsearch/appsearch/app-match.cpp | 41 ++++++++++++++++++++++++++++--- libsearch/appsearch/app-match.h | 1 + 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/libsearch/appsearch/app-match.cpp b/libsearch/appsearch/app-match.cpp index aca6503..4668447 100644 --- a/libsearch/appsearch/app-match.cpp +++ b/libsearch/appsearch/app-match.cpp @@ -228,10 +228,11 @@ void AppMatch::getDesktopFilePath() { } void AppMatch::getAppName(QMap &installed) { - QMap::const_iterator i; - for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) { - appNameMatch(i.key().app_name, installed); - } +// QMap::const_iterator i; +// for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) { +// appNameMatch(i.key().app_name, installed); +// } + appNameMatch(installed); qDebug() << "installed app match is successful!"; } @@ -276,6 +277,38 @@ void AppMatch::appNameMatch(QString appname, QMap &inst } } } +void AppMatch::appNameMatch(QMap &installed) { + QStringList list; + NameString name; + QMapIterator iter(m_installAppMap); + while(iter.hasNext()) { + iter.next(); + list = iter.value(); + name.app_name = iter.key().app_name; + if(iter.key().app_name.contains(m_sourceText, Qt::CaseInsensitive)) { + installed.insert(name, list); + continue; + } + + QStringList pinyinlist; + pinyinlist = FileUtils::findMultiToneWords(iter.key().app_name); + + for(int i = 0; i < pinyinlist.size() / 2; i++) { + QString shouzimu = pinyinlist.at(2 * i + 1); // 中文转首字母 + if(shouzimu.contains(m_sourceText, Qt::CaseInsensitive)) { + installed.insert(name, list); + continue; + } + if(m_sourceText.size() < 2) + continue; + QString pinyin = pinyinlist.at(2 * i); // 中文转拼音 + if(pinyin.contains(m_sourceText, Qt::CaseInsensitive)) { + installed.insert(name, list); + continue; + } + } + } +} void AppMatch::softWareCenterSearch(QMap &softwarereturn) { // if(m_interFace->timeout() != -1) { diff --git a/libsearch/appsearch/app-match.h b/libsearch/appsearch/app-match.h index 253992e..6f51d15 100644 --- a/libsearch/appsearch/app-match.h +++ b/libsearch/appsearch/app-match.h @@ -65,6 +65,7 @@ private: void getAppName(QMap &installed); // void appNameMatch(QString appname,QString desktoppath,QString appicon); void appNameMatch(QString appname, QMap &installed); + void appNameMatch(QMap &installed); void softWareCenterSearch(QMap &softwarereturn); From 3a3d05a468fc95a4f0015d4740864f9440adf0b4 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Tue, 29 Jun 2021 11:32:20 +0800 Subject: [PATCH 07/20] Update changelog. --- debian/changelog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/debian/changelog b/debian/changelog index 306ca47..5180890 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,23 @@ +ukui-search (0.4.0+0629) v101; urgency=medium + + * Bug 无 + * 任务号:无 + * 其他改动: + * Fix:App search error when display applications of the same name. + - 修复了当存在重名应用时应用搜索显示错误的问题。 + + -- zhangpengfei Tue, 29 Jun 2021 11:19:25 +0800 + +ukui-search (0.4.0+0628) v101; urgency=medium + + * Bug 无 + * 任务号:41543 + * 其他改动: + * Fix: Tray icon click won't work after Win+D. + -修复了在弹出建立索引提示弹窗后按WIN+D之后,点击任务栏托盘无法呼出页面的问题。 + + -- zhangpengfei Mon, 28 Jun 2021 09:35:15 +0800 + ukui-search (0.4.0+0619) v101; urgency=medium * Bug 无 From e3ffea6ea6c88a3b47222350d96b5f52ac85a3c7 Mon Sep 17 00:00:00 2001 From: jixiaoxu Date: Wed, 30 Jun 2021 09:03:00 +0800 Subject: [PATCH 08/20] Fix a bug(break->continue) --- libsearch/appsearch/app-match.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libsearch/appsearch/app-match.cpp b/libsearch/appsearch/app-match.cpp index 4668447..8ad8d31 100644 --- a/libsearch/appsearch/app-match.cpp +++ b/libsearch/appsearch/app-match.cpp @@ -297,14 +297,14 @@ void AppMatch::appNameMatch(QMap &installed) { QString shouzimu = pinyinlist.at(2 * i + 1); // 中文转首字母 if(shouzimu.contains(m_sourceText, Qt::CaseInsensitive)) { installed.insert(name, list); - continue; + break; } if(m_sourceText.size() < 2) - continue; + break; QString pinyin = pinyinlist.at(2 * i); // 中文转拼音 if(pinyin.contains(m_sourceText, Qt::CaseInsensitive)) { installed.insert(name, list); - continue; + break; } } } From 2daf23f7bd3c45e62396b8064d1afacaeb906f0b Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Wed, 30 Jun 2021 10:09:25 +0800 Subject: [PATCH 09/20] Add a systembus iface for add inotify_max_user_instance, avoid inotify_init fail. --- libsearch/index/inotify-watch.cpp | 14 ++++++++--- libsearch/index/ukui-search-qdbus.cpp | 12 ++++++++- libsearch/index/ukui-search-qdbus.h | 2 ++ ukuisearch-systemdbus/sysdbusregister.cpp | 30 +++++++++++++++++++++++ ukuisearch-systemdbus/sysdbusregister.h | 1 + 5 files changed, 55 insertions(+), 4 deletions(-) diff --git a/libsearch/index/inotify-watch.cpp b/libsearch/index/inotify-watch.cpp index 3cd7f6b..ea59b19 100644 --- a/libsearch/index/inotify-watch.cpp +++ b/libsearch/index/inotify-watch.cpp @@ -133,9 +133,17 @@ void InotifyWatch::run() if (m_inotifyFd > 0) { qDebug()<<"Inotify init success!"; } else { - printf("errno=%d\n",errno); - printf("Mesg:%s\n",strerror(errno)); - Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify"); + qWarning() << "Inotify init fail! Now try add inotify_user_instances."; + UkuiSearchQDBus usQDBus; + usQDBus.addInotifyUserInstances(128); + m_inotifyFd = inotify_init(); + if (m_inotifyFd > 0) { + qDebug()<<"Inotify init success!"; + } else { + printf("errno=%d\n",errno); + printf("Mesg:%s\n",strerror(errno)); + Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify"); + } } this->addWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation)); diff --git a/libsearch/index/ukui-search-qdbus.cpp b/libsearch/index/ukui-search-qdbus.cpp index b03f340..b39f4da 100644 --- a/libsearch/index/ukui-search-qdbus.cpp +++ b/libsearch/index/ukui-search-qdbus.cpp @@ -42,5 +42,15 @@ void UkuiSearchQDBus::setInotifyMaxUserWatches() { // sysctl this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2"); // /etc/sysctl.conf -// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3"); + // this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3"); +} + +int UkuiSearchQDBus::addInotifyUserInstances(int addNum) +{ + QDBusReply reply = tmpSystemQDBusInterface->call("AddInotifyMaxUserInstance", addNum); + if(reply.isValid()) { + qDebug() << "Set inotify_max_user_instances to" << reply.value(); + } else { + qWarning() << "Call AddInotifyMaxUserInstance failed!"; + } } diff --git a/libsearch/index/ukui-search-qdbus.h b/libsearch/index/ukui-search-qdbus.h index d316171..898dc53 100644 --- a/libsearch/index/ukui-search-qdbus.h +++ b/libsearch/index/ukui-search-qdbus.h @@ -21,12 +21,14 @@ #define UKUISEARCHQDBUS_H #include +#include namespace Zeeker { class UkuiSearchQDBus { public: UkuiSearchQDBus(); ~UkuiSearchQDBus(); void setInotifyMaxUserWatches(); + int addInotifyUserInstances(int addNum); private: QDBusInterface* tmpSystemQDBusInterface; }; diff --git a/ukuisearch-systemdbus/sysdbusregister.cpp b/ukuisearch-systemdbus/sysdbusregister.cpp index 194e59f..9303d25 100644 --- a/ukuisearch-systemdbus/sysdbusregister.cpp +++ b/ukuisearch-systemdbus/sysdbusregister.cpp @@ -102,6 +102,36 @@ QString SysdbusRegister::setInotifyMaxUserWatchesStep3() { return QString(ba); } +int SysdbusRegister::AddInotifyMaxUserInstance(int addNum) +{ + QFile file("/proc/sys/fs/inotify/max_user_instances"); + if(!file.open(QIODevice::ReadOnly | QIODevice::Text)) + return -1; + QTextStream ts(&file); + QString s = ts.read(512); + int instances = s.toInt() + addNum; + + QByteArray ba; + FILE * fp = NULL; + char cmd[128]; + char buf[1024]; + sprintf(cmd, "sysctl -w fs.inotify.max_user_instances=\"%d\"", instances); + if((fp = popen(cmd, "r")) != NULL) { + rewind(fp); + while(!feof(fp)) { + fgets(buf, sizeof(buf), fp); + ba.append(buf); + } + pclose(fp); + fp = NULL; + } else { + qWarning() << "popen open failed"; + return -1; + } + return instances; + +} + //The following example comes from control center //void SysdbusRegister::setAutoLoginStatus(QString username) { diff --git a/ukuisearch-systemdbus/sysdbusregister.h b/ukuisearch-systemdbus/sysdbusregister.h index caa9bc7..a52d75d 100644 --- a/ukuisearch-systemdbus/sysdbusregister.h +++ b/ukuisearch-systemdbus/sysdbusregister.h @@ -52,6 +52,7 @@ public slots: Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep1(); Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep2(); Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep3(); + Q_SCRIPTABLE int AddInotifyMaxUserInstance(int addNum); // // 设置免密登录状态 // Q_SCRIPTABLE void setNoPwdLoginStatus(); From eaafe8f993f12f0ecfbe092f86a1b2b9127bb103 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Wed, 30 Jun 2021 10:24:25 +0800 Subject: [PATCH 10/20] [Fix] Detail page display incorrectly occasionally. --- src/content-widget.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/content-widget.cpp b/src/content-widget.cpp index 5ec2370..0fdcee8 100644 --- a/src/content-widget.cpp +++ b/src/content-widget.cpp @@ -730,7 +730,7 @@ void ContentWidget::onListViewRowChanged(SearchListView * listview, const int &t if(type == SearchItem::SearchType::Contents && !m_contentDetailList.isEmpty()) { m_detailView->isContent = true; m_detailView->setContent(m_contentDetailList.at(listview->currentIndex().row()), m_keyword); - } else if(type == SearchItem::SearchType::Best && !m_bestContent.isEmpty() && listview->currentIndex().row() == listview->getLength() - 1) { + } else if(type == SearchItem::SearchType::Best && !m_bestContent.isEmpty() && SearchItem::SearchType::Contents == m_bestList.at(listview->currentIndex().row()).first) { m_detailView->setContent(m_bestContent, m_keyword); m_detailView->isContent = true; m_detailView->setupWidget(SearchItem::SearchType::Contents, path); From 8ad678302ee59a8e8a4513542ce91fd13ef184ea Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Wed, 30 Jun 2021 14:31:50 +0800 Subject: [PATCH 11/20] Update changelog ukui-search0.4.0+0630. --- debian/changelog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/debian/changelog b/debian/changelog index 5180890..a889059 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,15 @@ +ukui-search (0.4.0+0630) v101; urgency=medium + + * Bug 无 + * 任务号:无 + * 其他改动: + * Add a systembus iface for add inotify_max_user_instance, avoid inotify_init fail. + - 增加修改inotify_max_user_instance配置的dbus接口,避免由于超出最大数量导致的inotify_init失败问题。 + * Fix: Detail page display incorrectly occasionally. + - 修复了偶现的点击最佳列表,右侧详情显示错误的问题。 + + -- zhangpengfei Wed, 30 Jun 2021 11:38:31 +0800 + ukui-search (0.4.0+0629) v101; urgency=medium * Bug 无 From f9e9ea67ade45b4404674737eede874a083c7709 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Wed, 30 Jun 2021 18:50:11 +0800 Subject: [PATCH 12/20] =?UTF-8?q?Update=20README.md=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c00ecf2..d375118 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,17 @@ # ukui-search -[WIP] UKUI Search is a user-wide desktop search feature of UKUI desktop environment. +[dWIP] UKUI Search is a user-wide desktop search feature of UKUI desktop environment. + +Build from source + + + git clone https://github.com/ukui/ukui-search.git + + cd ukui-search && mkdir build && cd build + + qmake .. && make + + sudo make install + + /usr/bin/ukui-search From 272d707230e536179f6037d06eafe0c760d2d826 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Thu, 1 Jul 2021 19:37:37 +0800 Subject: [PATCH 13/20] [Fix] Creat fifo error sometimes. --- libsearch/index/first-index.cpp | 13 ------------- libsearch/index/searchmethodmanager.cpp | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/libsearch/index/first-index.cpp b/libsearch/index/first-index.cpp index fb3ef49..15f6429 100644 --- a/libsearch/index/first-index.cpp +++ b/libsearch/index/first-index.cpp @@ -97,19 +97,6 @@ void FirstIndex::run() { QTime t1 = QTime::currentTime(); // Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running. - QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search"); - if(!fifoDir.exists()) - qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath()); - - unlink(UKUI_SEARCH_PIPE_PATH); - int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777); - if(retval == -1) { - qCritical() << "creat fifo error!!"; - syslog(LOG_ERR, "creat fifo error!!\n"); - assert(false); - return; - } - qDebug() << "create fifo success\n"; QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString(); QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString(); diff --git a/libsearch/index/searchmethodmanager.cpp b/libsearch/index/searchmethodmanager.cpp index dc073cb..bfdc7ee 100644 --- a/libsearch/index/searchmethodmanager.cpp +++ b/libsearch/index/searchmethodmanager.cpp @@ -13,6 +13,21 @@ void SearchMethodManager::searchMethod(FileUtils::SearchMethod sm) { qWarning("enum class error!!!\n"); } if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::_index_status) { + + // Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running. + QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search"); + if(!fifoDir.exists()) + qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath()); + + unlink(UKUI_SEARCH_PIPE_PATH); + int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777); + if(retval == -1) { + qCritical() << "creat fifo error!!"; + syslog(LOG_ERR, "creat fifo error!!\n"); + assert(false); + return; + } + qDebug() << "create fifo success\n"; qWarning() << "start first index"; m_fi.start(); qWarning() << "start inotify index"; From ae5477b901698b6b0a047ce28c0eb930787f6da6 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Fri, 2 Jul 2021 14:50:42 +0800 Subject: [PATCH 14/20] Update desktop file. --- data/ukui-search-menu.desktop | 1 + 1 file changed, 1 insertion(+) diff --git a/data/ukui-search-menu.desktop b/data/ukui-search-menu.desktop index 07b5690..085b75c 100644 --- a/data/ukui-search-menu.desktop +++ b/data/ukui-search-menu.desktop @@ -9,6 +9,7 @@ Exec=/usr/bin/ukui-search -s Type=Application Icon=kylin-search X-UKUI-AutoRestart=true +NoDisplay=true OnlyShowIn=UKUI X-UKUI-Autostart-Phase=Application Terminal=false From 701fdf783ccf435a53e77964970cdc0db3e32c56 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Sat, 3 Jul 2021 10:50:46 +0800 Subject: [PATCH 15/20] Update changelog ukui-search0.4.0+0703. --- debian/changelog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/debian/changelog b/debian/changelog index a889059..ac33d6d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,15 @@ +ukui-search (0.4.0+0703) v101; urgency=medium + + * Bug 无 + * 任务号:无 + * 其他改动: + * Fix:Creat fifo error sometimes. + - 修复了在开关索引时偶现的由于创建管道失败导致的崩溃问题。 + * Remove entry from ukui-menu. + - 移除了开始菜单入口(开始菜单里的搜索应用显示)。 + + -- zhangpengfei Sat, 03 Jul 2021 10:13:23 +0800 + ukui-search (0.4.0+0630) v101; urgency=medium * Bug 无 From 37fa6214520ac34353b49027446a406756c301ae Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Tue, 6 Jul 2021 16:53:32 +0800 Subject: [PATCH 16/20] [Fix] Path inclusive relation judgment incorrectly. --- libsearch/file-utils.cpp | 19 +++++++++++++++++++ libsearch/file-utils.h | 2 ++ libsearch/global-settings.cpp | 8 ++++++-- libsearch/global-settings.h | 1 + libsearch/index/inotify-watch.cpp | 5 +++-- libsearch/index/pending-file-queue.cpp | 3 ++- libsearch/index/search-manager.cpp | 4 ++-- 7 files changed, 35 insertions(+), 7 deletions(-) diff --git a/libsearch/file-utils.cpp b/libsearch/file-utils.cpp index f6f8e50..31f1f69 100644 --- a/libsearch/file-utils.cpp +++ b/libsearch/file-utils.cpp @@ -178,6 +178,25 @@ QString FileUtils::getSettingName(const QString& setting) { return setting.right(setting.length() - setting.lastIndexOf("/") - 1); } +bool FileUtils::isOrUnder(QString pathA, QString pathB) +{ + if(!pathA.startsWith("/")) + pathA.prepend("/"); + if(!pathB.startsWith("/")) + pathB.prepend("/"); + + if(pathA == pathB) + return true; + + if(pathA.length() > pathB.length()) + return false; + + if(pathA.startsWith(pathB + "/")) + return true; + + return false; +} + void FileUtils::loadHanziTable(const QString &fileName) { QFile file(fileName); diff --git a/libsearch/file-utils.h b/libsearch/file-utils.h index a352d63..f0a9085 100644 --- a/libsearch/file-utils.h +++ b/libsearch/file-utils.h @@ -67,6 +67,8 @@ public: static QString getFileName(const QString &); static QString getAppName(const QString &); static QString getSettingName(const QString &); + //A is or under B + static bool isOrUnder(QString pathA, QString pathB); //chinese character to pinyin static QMap map_chinese2pinyin; diff --git a/libsearch/global-settings.cpp b/libsearch/global-settings.cpp index cba42e5..3aebc89 100644 --- a/libsearch/global-settings.cpp +++ b/libsearch/global-settings.cpp @@ -155,15 +155,19 @@ bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool rem //why QSetting's key can't start with "/"?? QString pathKey = path.right(path.length() - 1); + if (pathKey.endsWith(QLatin1Char('/'))) { + pathKey = pathKey.mid(0, pathKey.length() - 1); + } + QStringList blockDirs = m_block_dirs_settings->allKeys(); for(QString i : blockDirs) { - if(pathKey.startsWith(i)) { + if(FileUtils::isOrUnder(pathKey, i)) { // returnCode = QString(tr("My parent folder has been blocked!")); returnCode = PATH_PARENT_BLOCKED; return false; } - if(i.startsWith(pathKey)) + if(FileUtils::isOrUnder(i, pathKey)) m_block_dirs_settings->remove(i); } m_block_dirs_settings->setValue(pathKey, "0"); diff --git a/libsearch/global-settings.h b/libsearch/global-settings.h index 4a61cc2..de520ab 100644 --- a/libsearch/global-settings.h +++ b/libsearch/global-settings.h @@ -36,6 +36,7 @@ #include #include #include "libsearch_global.h" +#include "file-utils.h" #define CONTROL_CENTER_PERSONALISE_GSETTINGS_ID "org.ukui.control-center.personalise" #define TRANSPARENCY_KEY "transparency" diff --git a/libsearch/index/inotify-watch.cpp b/libsearch/index/inotify-watch.cpp index ea59b19..eb845a8 100644 --- a/libsearch/index/inotify-watch.cpp +++ b/libsearch/index/inotify-watch.cpp @@ -49,7 +49,7 @@ bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase) for(QMap::Iterator i = currentPath.begin(); i != currentPath.end();) { // qDebug() << i.value(); // if(i.value().length() > path.length()) { - if(i.value().startsWith(path)) { + if(FileUtils::isOrUnder(i.value(), path)) { qDebug() << "remove path: " << i.value(); inotify_rm_watch(m_inotifyFd, currentPath.key(path)); PendingFile f(i.value()); @@ -65,7 +65,8 @@ bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase) for(QMap::Iterator i = currentPath.begin(); i != currentPath.end();) { // qDebug() << i.value(); if(i.value().length() > path.length()) { - if(i.value().startsWith(path)) { + if(FileUtils::isOrUnder(i.value(), path)) { +// if(i.value().startsWith(path + "/")) { // qDebug() << "remove path: " << i.value(); inotify_rm_watch(m_inotifyFd, currentPath.key(path)); currentPath.erase(i++); diff --git a/libsearch/index/pending-file-queue.cpp b/libsearch/index/pending-file-queue.cpp index ab45da3..47a93c9 100644 --- a/libsearch/index/pending-file-queue.cpp +++ b/libsearch/index/pending-file-queue.cpp @@ -18,6 +18,7 @@ * */ #include "pending-file-queue.h" +#include "file-utils.h" #include using namespace Zeeker; static PendingFileQueue *global_instance_pending_file_queue = nullptr; @@ -88,7 +89,7 @@ void PendingFileQueue::enqueue(const PendingFile &file) // Because our datebase need to delete those indexs one by one. if(file.shouldRemoveIndex() && file.isDir()) { const auto keepFile = [&file](const PendingFile& pending) { - return (!pending.path().startsWith(file.path()) || pending.shouldRemoveIndex()); + return (!FileUtils::isOrUnder(pending.path(), file.path()) || pending.shouldRemoveIndex()); }; const auto end = m_cache.end(); const auto droppedFilesBegin = std::stable_partition(m_cache.begin(), end, keepFile); diff --git a/libsearch/index/search-manager.cpp b/libsearch/index/search-manager.cpp index dc5585b..c501594 100644 --- a/libsearch/index/search-manager.cpp +++ b/libsearch/index/search-manager.cpp @@ -81,7 +81,7 @@ void SearchManager::onKeywordSearch(QString keyword, QQueue *searchResu bool SearchManager::isBlocked(QString &path) { QStringList blockList = GlobalSettings::getInstance()->getBlockDirs(); for(QString i : blockList) { - if(path.startsWith(i.prepend("/"))) + if(FileUtils::isOrUnder(path, i)) return true; } return false; @@ -414,7 +414,7 @@ void DirectSearch::run() { bool findIndex = false; for (QString j : blockList) { - if (i.absoluteFilePath().startsWith(j.prepend("/"))) { + if (FileUtils::isOrUnder(i.absoluteFilePath(), j)) { findIndex = true; break; } From 7f04d303f21f26e7e21bce92fcc31a3145795ea0 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Wed, 7 Jul 2021 10:23:59 +0800 Subject: [PATCH 17/20] Update file-utils.cpp/isOrUnder method. --- libsearch/file-utils.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/libsearch/file-utils.cpp b/libsearch/file-utils.cpp index 31f1f69..933af51 100644 --- a/libsearch/file-utils.cpp +++ b/libsearch/file-utils.cpp @@ -180,18 +180,15 @@ QString FileUtils::getSettingName(const QString& setting) { bool FileUtils::isOrUnder(QString pathA, QString pathB) { - if(!pathA.startsWith("/")) + if(pathA[0] != "/") pathA.prepend("/"); - if(!pathB.startsWith("/")) + if(pathA[0] != "/") pathB.prepend("/"); - if(pathA == pathB) - return true; - - if(pathA.length() > pathB.length()) + if(pathA.length() < pathB.length()) return false; - if(pathA.startsWith(pathB + "/")) + if(pathA == pathB || pathA.startsWith(pathB + "/")) return true; return false; From ff62a1e2b976a4a04eb46cb49fb8b59508327ab3 Mon Sep 17 00:00:00 2001 From: jixiaoxu Date: Wed, 7 Jul 2021 11:37:00 +0800 Subject: [PATCH 18/20] Merge DAG and DP code --- .../chinese-segmentation.cpp | 2 - libchinese-segmentation/cppjieba/DatTrie.hpp | 117 +++++++++++++++++- libchinese-segmentation/cppjieba/DictTrie.hpp | 7 ++ .../cppjieba/HMMSegment.hpp | 8 +- .../cppjieba/MPSegment.hpp | 39 +++++- .../cppjieba/MixSegment.hpp | 107 +++++++++------- .../cppjieba/PreFilter.hpp | 10 +- src/src.pro | 2 +- 8 files changed, 228 insertions(+), 64 deletions(-) diff --git a/libchinese-segmentation/chinese-segmentation.cpp b/libchinese-segmentation/chinese-segmentation.cpp index fe4e95f..3b6f04c 100644 --- a/libchinese-segmentation/chinese-segmentation.cpp +++ b/libchinese-segmentation/chinese-segmentation.cpp @@ -66,8 +66,6 @@ QVector ChineseSegmentation::callSegement(std::string s) { keywordres.clear(); // keywordres.shrink_to_fit(); - - return vecNeeds; } diff --git a/libchinese-segmentation/cppjieba/DatTrie.hpp b/libchinese-segmentation/cppjieba/DatTrie.hpp index a4967a3..0709a4f 100644 --- a/libchinese-segmentation/cppjieba/DatTrie.hpp +++ b/libchinese-segmentation/cppjieba/DatTrie.hpp @@ -167,6 +167,121 @@ public: } } + void Find_Reverse(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, + vector&res, size_t max_word_len) const { + + res.clear(); + res.resize(end - begin); + + string text_str; + EncodeRunesToString(begin, end, text_str); + + static const size_t max_num = 128; + JiebaDAT::result_pair_type result_pairs[max_num] = {}; + + size_t str_size = end - begin; + for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) { + + begin_pos -= (end - i - 1)->len; + std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num); + res[str_size - i - 1].nexts.push_back(pair(str_size - i, nullptr)); + + for (std::size_t idx = 0; idx < num_results; ++idx) { + auto & match = result_pairs[idx]; + if ((match.value < 0) || ((size_t)match.value >= elements_num_)) { + continue; + } + + auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length); + + if (char_num > max_word_len) { + continue; + } + + auto pValue = &elements_ptr_[match.value]; + + if (1 == char_num) { + res[str_size - i - 1].nexts[0].second = pValue; + continue; + } + + res[str_size - i - 1].nexts.push_back(pair(str_size - 1 - i + char_num, pValue)); + } + } + } + void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, + vector& words, size_t max_word_len) const { + + string text_str; + EncodeRunesToString(begin, end, text_str); + + static const size_t max_num = 128; + JiebaDAT::result_pair_type result_pairs[max_num] = {};//存放字典查询结果 + size_t str_size = end - begin; + double max_weight[str_size];//存放逆向路径最大weight + for (size_t i = 0; ilen; + + std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num); + if (0 == num_results) {//字典不存在则单独分词 + val = min_weight_; + + if (nextPos < str_size) { + val += max_weight[nextPos]; + } + if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) { + max_weight[nextPos - 1] = val; + max_next[nextPos - 1] = nextPos; + } + } else {//字典存在则根据查询结果数量计算最大概率路径 + for (std::size_t idx = 0; idx < num_results; ++idx) { + auto & match = result_pairs[idx]; + if ((match.value < 0) || ((size_t)match.value >= elements_num_)) { + continue; + } + auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length); + if (char_num > max_word_len) { + continue; + } + auto pValue = &elements_ptr_[match.value]; + + val = pValue->weight; + if (1 == char_num) { + if (nextPos < str_size) { + val += max_weight[nextPos]; + } + if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) { + max_weight[nextPos - 1] = val; + max_next[nextPos - 1] = nextPos; + } + } else { + if (nextPos - 1 + char_num < str_size) { + val += max_weight[nextPos - 1 + char_num]; + } + if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) { + max_weight[nextPos - 1] = val; + max_next[nextPos - 1] = nextPos - 1 + char_num; + } + } + } + } + } + for (size_t i = 0; i < str_size;) {//统计动态规划结果 + assert(max_next[i] > i); + assert(max_next[i] <= str_size); + WordRange wr(begin + i, begin + max_next[i] - 1); + words.push_back(wr); + i = max_next[i]; + } + } double GetMinWeight() const { return min_weight_; } @@ -284,7 +399,7 @@ private: //const int fd =::mkstemp(&tmp_filepath[0]); //原mkstemp用法有误,已修复--jxx20210519 const int fd =::mkstemp((char *)tmp_filepath.data()); - qDebug() << "mkstemp error:" << errno << tmp_filepath.data(); + qDebug() << "mkstemp :" << errno << tmp_filepath.data(); assert(fd >= 0); ::fchmod(fd, 0644); diff --git a/libchinese-segmentation/cppjieba/DictTrie.hpp b/libchinese-segmentation/cppjieba/DictTrie.hpp index 5ecee54..44a6cb9 100644 --- a/libchinese-segmentation/cppjieba/DictTrie.hpp +++ b/libchinese-segmentation/cppjieba/DictTrie.hpp @@ -49,6 +49,13 @@ public: dat_.Find(begin, end, res, max_word_len); } + void Find(RuneStrArray::const_iterator begin, + RuneStrArray::const_iterator end, + vector& words, + size_t max_word_len = MAX_WORD_LENGTH) const { + dat_.Find(begin, end, words, max_word_len); + } + bool IsUserDictSingleChineseWord(const Rune& word) const { return IsIn(user_dict_single_chinese_word_, word); } diff --git a/libchinese-segmentation/cppjieba/HMMSegment.hpp b/libchinese-segmentation/cppjieba/HMMSegment.hpp index 30af449..1a9937b 100644 --- a/libchinese-segmentation/cppjieba/HMMSegment.hpp +++ b/libchinese-segmentation/cppjieba/HMMSegment.hpp @@ -138,10 +138,10 @@ private: size_t now, old, stat; double tmp, endE, endS; - vector path(XYSize); - vector weight(XYSize); - //int path[XYSize]; - //double weight[XYSize]; + //vector path(XYSize); + //vector weight(XYSize); + int path[XYSize]; + double weight[XYSize]; //start for (size_t y = 0; y < Y; y++) { diff --git a/libchinese-segmentation/cppjieba/MPSegment.hpp b/libchinese-segmentation/cppjieba/MPSegment.hpp index d615fe2..0158e4a 100644 --- a/libchinese-segmentation/cppjieba/MPSegment.hpp +++ b/libchinese-segmentation/cppjieba/MPSegment.hpp @@ -22,10 +22,11 @@ public: RuneStrArray::const_iterator end, vector& words, bool, size_t max_word_len) const override { - vector dags; - dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx - CalcDP(dags);//动态规划(Dynamic Programming,DP),根据DAG计算最优动态规划路径--jxx - CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx +// vector dags; +// dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx +// CalcDP(dags);//动态规划(Dynamic Programming,DP),根据DAG计算最优动态规划路径--jxx +// CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx + dictTrie_->Find(begin, end, words, max_word_len); } virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector& res, bool hmm, @@ -48,6 +49,7 @@ public: return dictTrie_->IsUserDictSingleChineseWord(value); } private: +/* void CalcDP(vector& dags) const { double val(0); for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) { @@ -73,6 +75,35 @@ private: } } } +*/ +/* 倒叙方式重写CalcDP函数,初步测试未发现问题*/ + void CalcDP(vector& dags) const { + double val(0); + size_t size = dags.size(); + + for (size_t i = 0; i < size; i++) { + dags[size - 1 - i].max_next = -1; + dags[size - 1 - i].max_weight = MIN_DOUBLE; + + for (const auto & it : dags[size - 1 - i].nexts) { + const auto nextPos = it.first; + val = dictTrie_->GetMinWeight(); + + if (nullptr != it.second) { + val = it.second->weight; + } + + if (nextPos < dags.size()) { + val += dags[nextPos].max_weight; + } + + if ((nextPos <= dags.size()) && (val > dags[size - 1 - i].max_weight)) { + dags[size - 1 - i].max_weight = val; + dags[size - 1 - i].max_next = nextPos; + } + } + } + } void CutByDag(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator, diff --git a/libchinese-segmentation/cppjieba/MixSegment.hpp b/libchinese-segmentation/cppjieba/MixSegment.hpp index a539039..9e67069 100644 --- a/libchinese-segmentation/cppjieba/MixSegment.hpp +++ b/libchinese-segmentation/cppjieba/MixSegment.hpp @@ -123,65 +123,76 @@ public: virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map& res, bool hmm, size_t) const override { vector words; - assert(end >= begin); - words.reserve(end - begin); - mpSeg_.CutRuneArray(begin, end, words); - vector hmmRes; - hmmRes.reserve(end - begin); + assert(end >= begin); + if (3 == begin->len or 4 == begin->len) { + words.reserve(end - begin); + mpSeg_.CutRuneArray(begin, end, words); + hmmRes.reserve(words.size()); + } else { + hmmRes.reserve(end - begin); + } - for (size_t i = 0; i < words.size(); i++) { + if (words.size() != 0) {//存在中文分词结果 + for (size_t i = 0; i < words.size(); i++) { - string str = GetStringFromRunes(s, words[i].left, words[i].right); + string str = GetStringFromRunes(s, words[i].left, words[i].right); - if (stopWords_.find(str) != stopWords_.end()) { - continue; - } - - if (words[i].left != words[i].right) { - res[str].offsets.push_back(words[i].left->offset); - res[str].weight += 1.0; - continue; - } - if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune) - || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back if (stopWords_.find(str) != stopWords_.end()) { continue; } - res[str].offsets.push_back(words[i].left->offset); - res[str].weight += 1.0; - continue; - } - // if mp Get a single one and it is not in userdict, collect it in sequence - size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里(i字符不是最后一个字符),直接判定j字符 - - while (j < (words.size() - 1) - && words[j].left == words[j].right - && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) { - j++; - } - - // Cut the sequence with hmm - assert(j - 1 >= i); - // TODO - hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes); - - //put hmm result to result - for (size_t k = 0; k < hmmRes.size(); k++) { - string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right); - if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) { + if (words[i].left != words[i].right) { + res[str].offsets.push_back(words[i].left->offset); + res[str].weight += 1.0; continue; } - res[hmmStr].offsets.push_back(hmmRes[k].left->offset); - res[hmmStr].weight += 1.0; + if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune) + || i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back + if (stopWords_.find(str) != stopWords_.end()) { + continue; + } + res[str].offsets.push_back(words[i].left->offset); + res[str].weight += 1.0; + continue; + } + + // if mp Get a single one and it is not in userdict, collect it in sequence + size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里(i字符不是最后一个字符),直接判定j字符 + + while (j < (words.size() - 1) + && words[j].left == words[j].right + && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) { + j++; + } + + // Cut the sequence with hmm + assert(j - 1 >= i); + // TODO + hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes); + + //put hmm result to result + for (size_t k = 0; k < hmmRes.size(); k++) { + string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right); + if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) { + continue; + } + res[hmmStr].offsets.push_back(hmmRes[k].left->offset); + res[hmmStr].weight += 1.0; + } + + //clear tmp vars + hmmRes.clear(); + + //let i jump over this piece + i = j - 1; + } + } else {//不存在中文分词结果 + for (size_t i = 0; i < (size_t)(end - begin); i++) { + string str = s.substr((begin+i)->offset, (begin+i)->len); + res[str].offsets.push_back((begin+i)->offset); + res[str].weight += 1.0; } - - //clear tmp vars - hmmRes.clear(); - - //let i jump over this piece - i = j - 1; } } diff --git a/libchinese-segmentation/cppjieba/PreFilter.hpp b/libchinese-segmentation/cppjieba/PreFilter.hpp index 3f04dcf..2dd30dd 100644 --- a/libchinese-segmentation/cppjieba/PreFilter.hpp +++ b/libchinese-segmentation/cppjieba/PreFilter.hpp @@ -57,7 +57,6 @@ public: } wordRange.left = cursor_; - if (cursor_->rune == 0x20) { while (cursor_ != sentence_.end()) { if (cursor_->rune != 0x20) { @@ -71,7 +70,10 @@ public: cursor_ ++; } } - int num = 0; + + int max_num = 0; + uint32_t utf8_num = cursor_->len; + while (cursor_ != sentence_.end()) { if (cursor_->rune == 0x20) { if (wordRange.left == cursor_) { @@ -83,8 +85,8 @@ public: } cursor_ ++; - num++; - if (num >= 1024) { //todo 防止一次性传入过多字节,暂定限制为1024个字 + max_num++; + if (max_num >= 1024 or cursor_->len != utf8_num) { //todo 防止一次性传入过多字节,暂定限制为1024个字 wordRange.right = cursor_; return true; } diff --git a/src/src.pro b/src/src.pro index ff309e5..f61b5a7 100644 --- a/src/src.pro +++ b/src/src.pro @@ -9,7 +9,7 @@ TEMPLATE = app PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 CONFIG += c++11 link_pkgconfig no_keywords lrelease LIBS += -lxapian -lgsettings-qt -lquazip5 -lX11 -#LIBS += -lukui-log4qt -L/usr/local/lib/libjemalloc -ljemalloc +LIBS += -lukui-log4qt #-L/usr/local/lib/libjemalloc -ljemalloc # The following define makes your compiler emit warnings if you use # any Qt feature that has been marked deprecated (the exact warnings # depend on your compiler). Please consult the documentation of the From ee3060f30fb72ba5bea5f77826b132fb2e699d70 Mon Sep 17 00:00:00 2001 From: Zhai Kangning Date: Wed, 7 Jul 2021 06:20:22 +0000 Subject: [PATCH 19/20] Update file-utils.cpp --- libsearch/file-utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsearch/file-utils.cpp b/libsearch/file-utils.cpp index 933af51..113fbf5 100644 --- a/libsearch/file-utils.cpp +++ b/libsearch/file-utils.cpp @@ -182,7 +182,7 @@ bool FileUtils::isOrUnder(QString pathA, QString pathB) { if(pathA[0] != "/") pathA.prepend("/"); - if(pathA[0] != "/") + if(pathB[0] != "/") pathB.prepend("/"); if(pathA.length() < pathB.length()) From a152d54f5c81ebc5ac8abfcde15a2b436b28a915 Mon Sep 17 00:00:00 2001 From: iaom <18504285112@163.com> Date: Sat, 10 Jul 2021 10:50:40 +0800 Subject: [PATCH 20/20] =?UTF-8?q?Update=20changelog=20ukui-search0.4.0+070?= =?UTF-8?q?9=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debian/changelog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/debian/changelog b/debian/changelog index ac33d6d..16d0351 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,16 @@ +ukui-search (0.4.0+0709) v101; urgency=medium + + * Bug 无 + * 任务号:无 + * 其他改动: + * Fix: Path inclusive relation judgment incorrectly. + -修复了由于目录包含关系判断不当导致的一系列问题(黑名单屏蔽错误等)。 + * Merge DAG and DP code; Preprocessing text content distinguish Chinese from + others. + - 优化关键词提取流程,缩短了一些索引所需的时间. + + -- zhangpengfei Fri, 09 Jul 2021 14:43:14 +0800 + ukui-search (0.4.0+0703) v101; urgency=medium * Bug 无