forked from openkylin/ukui-search
Merge from main.
This commit is contained in:
commit
dc30f444d4
15
README.md
15
README.md
|
@ -1,4 +1,17 @@
|
||||||
# ukui-search
|
# ukui-search
|
||||||
|
|
||||||
[WIP] UKUI Search is a user-wide desktop search feature of UKUI desktop environment.
|
[dWIP] UKUI Search is a user-wide desktop search feature of UKUI desktop environment.
|
||||||
|
|
||||||
|
Build from source
|
||||||
|
|
||||||
|
|
||||||
|
git clone https://github.com/ukui/ukui-search.git
|
||||||
|
|
||||||
|
cd ukui-search && mkdir build && cd build
|
||||||
|
|
||||||
|
qmake .. && make
|
||||||
|
|
||||||
|
sudo make install
|
||||||
|
|
||||||
|
/usr/bin/ukui-search
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,70 @@
|
||||||
|
ukui-search (0.4.0+0709) v101; urgency=medium
|
||||||
|
|
||||||
|
* Bug 无
|
||||||
|
* 任务号:无
|
||||||
|
* 其他改动:
|
||||||
|
* Fix: Path inclusive relation judgment incorrectly.
|
||||||
|
-修复了由于目录包含关系判断不当导致的一系列问题(黑名单屏蔽错误等)。
|
||||||
|
* Merge DAG and DP code; Preprocessing text content distinguish Chinese from
|
||||||
|
others.
|
||||||
|
- 优化关键词提取流程,缩短了一些索引所需的时间.
|
||||||
|
|
||||||
|
-- zhangpengfei <zhangpengfei@kylinos.cn> Fri, 09 Jul 2021 14:43:14 +0800
|
||||||
|
|
||||||
|
ukui-search (0.4.0+0703) v101; urgency=medium
|
||||||
|
|
||||||
|
* Bug 无
|
||||||
|
* 任务号:无
|
||||||
|
* 其他改动:
|
||||||
|
* Fix:Creat fifo error sometimes.
|
||||||
|
- 修复了在开关索引时偶现的由于创建管道失败导致的崩溃问题。
|
||||||
|
* Remove entry from ukui-menu.
|
||||||
|
- 移除了开始菜单入口(开始菜单里的搜索应用显示)。
|
||||||
|
|
||||||
|
-- zhangpengfei <zhangpengfei@kylinos.cn> Sat, 03 Jul 2021 10:13:23 +0800
|
||||||
|
|
||||||
|
ukui-search (0.4.0+0630) v101; urgency=medium
|
||||||
|
|
||||||
|
* Bug 无
|
||||||
|
* 任务号:无
|
||||||
|
* 其他改动:
|
||||||
|
* Add a systembus iface for add inotify_max_user_instance, avoid inotify_init fail.
|
||||||
|
- 增加修改inotify_max_user_instance配置的dbus接口,避免由于超出最大数量导致的inotify_init失败问题。
|
||||||
|
* Fix: Detail page display incorrectly occasionally.
|
||||||
|
- 修复了偶现的点击最佳列表,右侧详情显示错误的问题。
|
||||||
|
|
||||||
|
-- zhangpengfei <zhangpengfei@kylinos.cn> Wed, 30 Jun 2021 11:38:31 +0800
|
||||||
|
|
||||||
|
ukui-search (0.4.0+0629) v101; urgency=medium
|
||||||
|
|
||||||
|
* Bug 无
|
||||||
|
* 任务号:无
|
||||||
|
* 其他改动:
|
||||||
|
* Fix:App search error when display applications of the same name.
|
||||||
|
- 修复了当存在重名应用时应用搜索显示错误的问题。
|
||||||
|
|
||||||
|
-- zhangpengfei <zhangpengfei@kylinos.cn> Tue, 29 Jun 2021 11:19:25 +0800
|
||||||
|
|
||||||
|
ukui-search (0.4.0+0628) v101; urgency=medium
|
||||||
|
|
||||||
|
* Bug 无
|
||||||
|
* 任务号:41543
|
||||||
|
* 其他改动:
|
||||||
|
* Fix: Tray icon click won't work after Win+D.
|
||||||
|
-修复了在弹出建立索引提示弹窗后按WIN+D之后,点击任务栏托盘无法呼出页面的问题。
|
||||||
|
|
||||||
|
-- zhangpengfei <zhangpengfei@kylinos.cn> Mon, 28 Jun 2021 09:35:15 +0800
|
||||||
|
|
||||||
|
ukui-search (0.4.0+0619) v101; urgency=medium
|
||||||
|
|
||||||
|
* Bug 无
|
||||||
|
* 需求6732,6733,6734,6938
|
||||||
|
* 其他改动:
|
||||||
|
* Add inotify events queue for merging events,reduce disk io operations.
|
||||||
|
- 增加inotify信号合并缓冲队列,减少90%以上磁盘io操作。
|
||||||
|
|
||||||
|
-- zhangpengfei <zhangpengfei@kylinos.cn> Sat, 19 Jun 2021 09:12:10 +0800
|
||||||
|
|
||||||
ukui-search (0.4.0+0612) v101; urgency=medium
|
ukui-search (0.4.0+0612) v101; urgency=medium
|
||||||
|
|
||||||
* Bug 无
|
* Bug 无
|
||||||
|
|
|
@ -66,8 +66,6 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
|
||||||
|
|
||||||
keywordres.clear();
|
keywordres.clear();
|
||||||
// keywordres.shrink_to_fit();
|
// keywordres.shrink_to_fit();
|
||||||
|
|
||||||
|
|
||||||
return vecNeeds;
|
return vecNeeds;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,19 @@ struct DatElement {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct IdfElement {
|
||||||
|
string word;
|
||||||
|
double idf = 0;
|
||||||
|
|
||||||
|
bool operator < (const IdfElement & b) const {
|
||||||
|
if (word == b.word) {
|
||||||
|
return this->idf > b.idf;
|
||||||
|
}
|
||||||
|
|
||||||
|
return this->word < b.word;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
|
inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
|
||||||
return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
|
return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
|
||||||
}
|
}
|
||||||
|
@ -91,13 +104,24 @@ public:
|
||||||
JiebaDAT::result_pair_type find_result;
|
JiebaDAT::result_pair_type find_result;
|
||||||
dat_.exactMatchSearch(key.c_str(), find_result);
|
dat_.exactMatchSearch(key.c_str(), find_result);
|
||||||
|
|
||||||
if ((0 == find_result.length) || (find_result.value < 0) || (find_result.value >= elements_num_)) {
|
if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return &elements_ptr_[ find_result.value ];
|
return &elements_ptr_[ find_result.value ];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const double Find(const string & key, std::size_t length, std::size_t node_pos) const {
|
||||||
|
JiebaDAT::result_pair_type find_result;
|
||||||
|
dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos);
|
||||||
|
|
||||||
|
if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return idf_elements_ptr_[ find_result.value ];
|
||||||
|
}
|
||||||
|
|
||||||
void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
|
void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
|
||||||
vector<struct DatDag>&res, size_t max_word_len) const {
|
vector<struct DatDag>&res, size_t max_word_len) const {
|
||||||
|
|
||||||
|
@ -119,7 +143,7 @@ public:
|
||||||
for (std::size_t idx = 0; idx < num_results; ++idx) {
|
for (std::size_t idx = 0; idx < num_results; ++idx) {
|
||||||
auto & match = result_pairs[idx];
|
auto & match = result_pairs[idx];
|
||||||
|
|
||||||
if ((match.value < 0) || (match.value >= elements_num_)) {
|
if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,6 +167,121 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Find_Reverse(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
|
||||||
|
vector<struct DatDag>&res, size_t max_word_len) const {
|
||||||
|
|
||||||
|
res.clear();
|
||||||
|
res.resize(end - begin);
|
||||||
|
|
||||||
|
string text_str;
|
||||||
|
EncodeRunesToString(begin, end, text_str);
|
||||||
|
|
||||||
|
static const size_t max_num = 128;
|
||||||
|
JiebaDAT::result_pair_type result_pairs[max_num] = {};
|
||||||
|
|
||||||
|
size_t str_size = end - begin;
|
||||||
|
for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
|
||||||
|
|
||||||
|
begin_pos -= (end - i - 1)->len;
|
||||||
|
std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
|
||||||
|
res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - i, nullptr));
|
||||||
|
|
||||||
|
for (std::size_t idx = 0; idx < num_results; ++idx) {
|
||||||
|
auto & match = result_pairs[idx];
|
||||||
|
if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
|
||||||
|
|
||||||
|
if (char_num > max_word_len) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto pValue = &elements_ptr_[match.value];
|
||||||
|
|
||||||
|
if (1 == char_num) {
|
||||||
|
res[str_size - i - 1].nexts[0].second = pValue;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
res[str_size - i - 1].nexts.push_back(pair<size_t, const DatMemElem *>(str_size - 1 - i + char_num, pValue));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
|
||||||
|
vector<WordRange>& words, size_t max_word_len) const {
|
||||||
|
|
||||||
|
string text_str;
|
||||||
|
EncodeRunesToString(begin, end, text_str);
|
||||||
|
|
||||||
|
static const size_t max_num = 128;
|
||||||
|
JiebaDAT::result_pair_type result_pairs[max_num] = {};//存放字典查询结果
|
||||||
|
size_t str_size = end - begin;
|
||||||
|
double max_weight[str_size];//存放逆向路径最大weight
|
||||||
|
for (size_t i = 0; i<str_size; i++) {
|
||||||
|
max_weight[i] = -3.14e+100;
|
||||||
|
}
|
||||||
|
int max_next[str_size];//存放动态规划后的分词结果
|
||||||
|
memset(max_next,-1,str_size);
|
||||||
|
|
||||||
|
double val(0);
|
||||||
|
for (size_t i = 0, begin_pos = text_str.size(); i < str_size; i++) {
|
||||||
|
size_t nextPos = str_size - i;//逆向计算
|
||||||
|
begin_pos -= (end - i - 1)->len;
|
||||||
|
|
||||||
|
std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
|
||||||
|
if (0 == num_results) {//字典不存在则单独分词
|
||||||
|
val = min_weight_;
|
||||||
|
|
||||||
|
if (nextPos < str_size) {
|
||||||
|
val += max_weight[nextPos];
|
||||||
|
}
|
||||||
|
if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
|
||||||
|
max_weight[nextPos - 1] = val;
|
||||||
|
max_next[nextPos - 1] = nextPos;
|
||||||
|
}
|
||||||
|
} else {//字典存在则根据查询结果数量计算最大概率路径
|
||||||
|
for (std::size_t idx = 0; idx < num_results; ++idx) {
|
||||||
|
auto & match = result_pairs[idx];
|
||||||
|
if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
|
||||||
|
if (char_num > max_word_len) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto pValue = &elements_ptr_[match.value];
|
||||||
|
|
||||||
|
val = pValue->weight;
|
||||||
|
if (1 == char_num) {
|
||||||
|
if (nextPos < str_size) {
|
||||||
|
val += max_weight[nextPos];
|
||||||
|
}
|
||||||
|
if ((nextPos <= str_size) && (val > max_weight[nextPos - 1])) {
|
||||||
|
max_weight[nextPos - 1] = val;
|
||||||
|
max_next[nextPos - 1] = nextPos;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (nextPos - 1 + char_num < str_size) {
|
||||||
|
val += max_weight[nextPos - 1 + char_num];
|
||||||
|
}
|
||||||
|
if ((nextPos - 1 + char_num <= str_size) && (val > max_weight[nextPos - 1])) {
|
||||||
|
max_weight[nextPos - 1] = val;
|
||||||
|
max_next[nextPos - 1] = nextPos - 1 + char_num;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < str_size;) {//统计动态规划结果
|
||||||
|
assert(max_next[i] > i);
|
||||||
|
assert(max_next[i] <= str_size);
|
||||||
|
WordRange wr(begin + i, begin + max_next[i] - 1);
|
||||||
|
words.push_back(wr);
|
||||||
|
i = max_next[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
double GetMinWeight() const {
|
double GetMinWeight() const {
|
||||||
return min_weight_;
|
return min_weight_;
|
||||||
}
|
}
|
||||||
|
@ -156,6 +295,11 @@ public:
|
||||||
return InitAttachDat(dat_cache_file, md5);
|
return InitAttachDat(dat_cache_file, md5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool InitBuildDat(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
|
||||||
|
BuildDatCache(elements, dat_cache_file, md5);
|
||||||
|
return InitIdfAttachDat(dat_cache_file, md5);
|
||||||
|
}
|
||||||
|
|
||||||
bool InitAttachDat(const string & dat_cache_file, const string & md5) {
|
bool InitAttachDat(const string & dat_cache_file, const string & md5) {
|
||||||
mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
|
mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
|
||||||
|
|
||||||
|
@ -187,6 +331,37 @@ public:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool InitIdfAttachDat(const string & dat_cache_file, const string & md5) {
|
||||||
|
mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
|
||||||
|
|
||||||
|
if (mmap_fd_ < 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
|
||||||
|
assert(seek_off >= 0);
|
||||||
|
mmap_length_ = seek_off;
|
||||||
|
|
||||||
|
mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
|
||||||
|
assert(MAP_FAILED != mmap_addr_);
|
||||||
|
|
||||||
|
assert(mmap_length_ >= sizeof(CacheFileHeader));
|
||||||
|
CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
|
||||||
|
elements_num_ = header.elements_num;
|
||||||
|
min_weight_ = header.min_weight;
|
||||||
|
assert(sizeof(header.md5_hex) == md5.size());
|
||||||
|
|
||||||
|
if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double) + header.dat_size * dat_.unit_size());
|
||||||
|
idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header));
|
||||||
|
const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_;
|
||||||
|
dat_.set_array(dat_ptr, header.dat_size);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
|
void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
|
||||||
std::sort(elements.begin(), elements.end());
|
std::sort(elements.begin(), elements.end());
|
||||||
|
@ -224,7 +399,7 @@ private:
|
||||||
//const int fd =::mkstemp(&tmp_filepath[0]);
|
//const int fd =::mkstemp(&tmp_filepath[0]);
|
||||||
//原mkstemp用法有误,已修复--jxx20210519
|
//原mkstemp用法有误,已修复--jxx20210519
|
||||||
const int fd =::mkstemp((char *)tmp_filepath.data());
|
const int fd =::mkstemp((char *)tmp_filepath.data());
|
||||||
qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
|
qDebug() << "mkstemp :" << errno << tmp_filepath.data();
|
||||||
assert(fd >= 0);
|
assert(fd >= 0);
|
||||||
::fchmod(fd, 0644);
|
::fchmod(fd, 0644);
|
||||||
|
|
||||||
|
@ -240,12 +415,62 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BuildDatCache(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
|
||||||
|
std::sort(elements.begin(), elements.end());
|
||||||
|
|
||||||
|
vector<const char*> keys_ptr_vec;
|
||||||
|
vector<int> values_vec;
|
||||||
|
vector<double> mem_elem_vec;
|
||||||
|
|
||||||
|
keys_ptr_vec.reserve(elements.size());
|
||||||
|
values_vec.reserve(elements.size());
|
||||||
|
mem_elem_vec.reserve(elements.size());
|
||||||
|
|
||||||
|
CacheFileHeader header;
|
||||||
|
header.min_weight = min_weight_;
|
||||||
|
assert(sizeof(header.md5_hex) == md5.size());
|
||||||
|
memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < elements.size(); ++i) {
|
||||||
|
keys_ptr_vec.push_back(elements[i].word.data());
|
||||||
|
values_vec.push_back(i);
|
||||||
|
mem_elem_vec.push_back(elements[i].idf);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
|
||||||
|
assert(0 == ret);
|
||||||
|
header.elements_num = mem_elem_vec.size();
|
||||||
|
header.dat_size = dat_.size();
|
||||||
|
|
||||||
|
{
|
||||||
|
string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
|
||||||
|
::umask(S_IWGRP | S_IWOTH);
|
||||||
|
//const int fd =::mkstemp(&tmp_filepath[0]);
|
||||||
|
//原mkstemp用法有误,已修复--jxx20210519
|
||||||
|
const int fd =::mkstemp((char *)tmp_filepath.data());
|
||||||
|
qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
|
||||||
|
assert(fd >= 0);
|
||||||
|
::fchmod(fd, 0644);
|
||||||
|
|
||||||
|
auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
|
||||||
|
write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(double) * mem_elem_vec.size());
|
||||||
|
write_bytes += ::write(fd, dat_.array(), dat_.total_size());
|
||||||
|
|
||||||
|
assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(double) + dat_.total_size());
|
||||||
|
::close(fd);
|
||||||
|
|
||||||
|
const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
|
||||||
|
assert(0 == rename_ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
DatTrie(const DatTrie &);
|
DatTrie(const DatTrie &);
|
||||||
DatTrie &operator=(const DatTrie &);
|
DatTrie &operator=(const DatTrie &);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
JiebaDAT dat_;
|
JiebaDAT dat_;
|
||||||
const DatMemElem * elements_ptr_ = nullptr;
|
const DatMemElem * elements_ptr_ = nullptr;
|
||||||
|
const double * idf_elements_ptr_= nullptr;
|
||||||
size_t elements_num_ = 0;
|
size_t elements_num_ = 0;
|
||||||
double min_weight_ = 0;
|
double min_weight_ = 0;
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,13 @@ public:
|
||||||
dat_.Find(begin, end, res, max_word_len);
|
dat_.Find(begin, end, res, max_word_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Find(RuneStrArray::const_iterator begin,
|
||||||
|
RuneStrArray::const_iterator end,
|
||||||
|
vector<WordRange>& words,
|
||||||
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
|
dat_.Find(begin, end, words, max_word_len);
|
||||||
|
}
|
||||||
|
|
||||||
bool IsUserDictSingleChineseWord(const Rune& word) const {
|
bool IsUserDictSingleChineseWord(const Rune& word) const {
|
||||||
return IsIn(user_dict_single_chinese_word_, word);
|
return IsIn(user_dict_single_chinese_word_, word);
|
||||||
}
|
}
|
||||||
|
@ -130,7 +137,7 @@ private:
|
||||||
dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache";
|
dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache";
|
||||||
}
|
}
|
||||||
QString path = QString::fromStdString(dat_cache_path);
|
QString path = QString::fromStdString(dat_cache_path);
|
||||||
qDebug() << "#########path:" << path;
|
qDebug() << "#########Dict path:" << path;
|
||||||
if (dat_.InitAttachDat(dat_cache_path, md5)) {
|
if (dat_.InitAttachDat(dat_cache_path, md5)) {
|
||||||
LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_;
|
LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_;
|
||||||
total_dict_size_ = file_size_sum;
|
total_dict_size_ = file_size_sum;
|
||||||
|
|
|
@ -0,0 +1,134 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <cmath>
|
||||||
|
#include <limits>
|
||||||
|
#include "limonp/StringUtil.hpp"
|
||||||
|
#include "limonp/Logging.hpp"
|
||||||
|
#include "Unicode.hpp"
|
||||||
|
#include "DatTrie.hpp"
|
||||||
|
#include <QDebug>
|
||||||
|
namespace cppjieba {
|
||||||
|
|
||||||
|
using namespace limonp;
|
||||||
|
|
||||||
|
const size_t IDF_COLUMN_NUM = 2;
|
||||||
|
|
||||||
|
class IdfTrie {
|
||||||
|
public:
|
||||||
|
enum UserWordWeightOption {
|
||||||
|
WordWeightMin,
|
||||||
|
WordWeightMedian,
|
||||||
|
WordWeightMax,
|
||||||
|
}; // enum UserWordWeightOption
|
||||||
|
|
||||||
|
IdfTrie(const string& dict_path, const string & dat_cache_path = "",
|
||||||
|
UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
|
||||||
|
Init(dict_path, dat_cache_path, user_word_weight_opt);
|
||||||
|
}
|
||||||
|
|
||||||
|
~IdfTrie() {}
|
||||||
|
|
||||||
|
double Find(const string & word, std::size_t length = 0, std::size_t node_pos = 0) const {
|
||||||
|
return dat_.Find(word, length, node_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Find(RuneStrArray::const_iterator begin,
|
||||||
|
RuneStrArray::const_iterator end,
|
||||||
|
vector<struct DatDag>&res,
|
||||||
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
|
dat_.Find(begin, end, res, max_word_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsUserDictSingleChineseWord(const Rune& word) const {
|
||||||
|
return IsIn(user_dict_single_chinese_word_, word);
|
||||||
|
}
|
||||||
|
|
||||||
|
double GetMinWeight() const {
|
||||||
|
return dat_.GetMinWeight();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetTotalDictSize() const {
|
||||||
|
return total_dict_size_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Init(const string& dict_path, string dat_cache_path,
|
||||||
|
UserWordWeightOption user_word_weight_opt) {
|
||||||
|
size_t file_size_sum = 0;
|
||||||
|
const string md5 = CalcFileListMD5(dict_path, file_size_sum);
|
||||||
|
|
||||||
|
if (dat_cache_path.empty()) {
|
||||||
|
//未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
|
||||||
|
dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache";
|
||||||
|
}
|
||||||
|
QString path = QString::fromStdString(dat_cache_path);
|
||||||
|
qDebug() << "#########Idf path:" << path;
|
||||||
|
if (dat_.InitIdfAttachDat(dat_cache_path, md5)) {
|
||||||
|
total_dict_size_ = file_size_sum;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LoadDefaultIdf(dict_path);
|
||||||
|
double idf_sum_ = CalcIdfSum(static_node_infos_);
|
||||||
|
assert(static_node_infos_.size());
|
||||||
|
idfAverage_ = idf_sum_ / static_node_infos_.size();
|
||||||
|
assert(idfAverage_ > 0.0);
|
||||||
|
double min_weight = 0;
|
||||||
|
dat_.SetMinWeight(min_weight);
|
||||||
|
|
||||||
|
const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
|
||||||
|
assert(build_ret);
|
||||||
|
total_dict_size_ = file_size_sum;
|
||||||
|
vector<IdfElement>().swap(static_node_infos_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void LoadDefaultIdf(const string& filePath) {
|
||||||
|
ifstream ifs(filePath.c_str());
|
||||||
|
if(not ifs.is_open()){
|
||||||
|
return ;
|
||||||
|
}
|
||||||
|
XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
|
||||||
|
string line;
|
||||||
|
vector<string> buf;
|
||||||
|
size_t lineno = 0;
|
||||||
|
|
||||||
|
for (; getline(ifs, line); lineno++) {
|
||||||
|
if (line.empty()) {
|
||||||
|
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Split(line, buf, " ");
|
||||||
|
XCHECK(buf.size() == IDF_COLUMN_NUM) << "split result illegal, line:" << line;
|
||||||
|
IdfElement node_info;
|
||||||
|
node_info.word = buf[0];
|
||||||
|
node_info.idf = atof(buf[1].c_str());
|
||||||
|
static_node_infos_.push_back(node_info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double CalcIdfSum(const vector<IdfElement>& node_infos) const {
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < node_infos.size(); i++) {
|
||||||
|
sum += node_infos[i].idf;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
double idfAverage_;
|
||||||
|
private:
|
||||||
|
vector<IdfElement> static_node_infos_;
|
||||||
|
size_t total_dict_size_ = 0;
|
||||||
|
DatTrie dat_;
|
||||||
|
unordered_set<Rune> user_dict_single_chinese_word_;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
|
@ -21,7 +21,7 @@ public:
|
||||||
mix_seg_(&dict_trie_, &model_, stopWordPath),
|
mix_seg_(&dict_trie_, &model_, stopWordPath),
|
||||||
full_seg_(&dict_trie_),
|
full_seg_(&dict_trie_),
|
||||||
query_seg_(&dict_trie_, &model_, stopWordPath),
|
query_seg_(&dict_trie_, &model_, stopWordPath),
|
||||||
extractor(&dict_trie_, &model_, idfPath, stopWordPath){ }
|
extractor(&dict_trie_, &model_, idfPath, dat_cache_path,stopWordPath){ }
|
||||||
~Jieba() { }
|
~Jieba() { }
|
||||||
|
|
||||||
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include "MixSegment.hpp"
|
#include "MixSegment.hpp"
|
||||||
|
#include "IdfTrie.hpp"
|
||||||
|
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
|
|
||||||
|
@ -11,18 +12,14 @@ using namespace std;
|
||||||
/*utf8*/
|
/*utf8*/
|
||||||
class KeywordExtractor {
|
class KeywordExtractor {
|
||||||
public:
|
public:
|
||||||
// struct Word {
|
|
||||||
// string word;
|
|
||||||
// vector<size_t> offsets;
|
|
||||||
// double weight;
|
|
||||||
// }; // struct Word
|
|
||||||
|
|
||||||
KeywordExtractor(const DictTrie* dictTrie,
|
KeywordExtractor(const DictTrie* dictTrie,
|
||||||
const HMMModel* model,
|
const HMMModel* model,
|
||||||
const string& idfPath,
|
const string& idfPath,
|
||||||
|
const string& dat_cache_path,
|
||||||
const string& stopWordPath)
|
const string& stopWordPath)
|
||||||
: segment_(dictTrie, model, stopWordPath) {
|
: segment_(dictTrie, model, stopWordPath),
|
||||||
LoadIdfDict(idfPath);
|
idf_trie_(idfPath,dat_cache_path){
|
||||||
}
|
}
|
||||||
~KeywordExtractor() {
|
~KeywordExtractor() {
|
||||||
}
|
}
|
||||||
|
@ -63,12 +60,11 @@ public:
|
||||||
keywords.reserve(wordmap.size());
|
keywords.reserve(wordmap.size());
|
||||||
|
|
||||||
for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
|
for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
|
||||||
unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);//IDF词典查找
|
double idf = idf_trie_.Find(itr->first);
|
||||||
|
if (-1 != idf) {//IDF词典查找
|
||||||
if (cit != idfMap_.end()) {
|
itr->second.weight *= idf;
|
||||||
itr->second.weight *= cit->second;
|
|
||||||
} else {
|
} else {
|
||||||
itr->second.weight *= idfAverage_;
|
itr->second.weight *= idf_trie_.idfAverage_;
|
||||||
}
|
}
|
||||||
|
|
||||||
itr->second.word = itr->first;
|
itr->second.word = itr->first;
|
||||||
|
@ -80,51 +76,13 @@ public:
|
||||||
keywords.resize(topN);
|
keywords.resize(topN);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
void LoadIdfDict(const string& idfPath) {
|
|
||||||
ifstream ifs(idfPath.c_str());
|
|
||||||
if(not ifs.is_open()){
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
|
|
||||||
string line ;
|
|
||||||
vector<string> buf;
|
|
||||||
double idf = 0.0;
|
|
||||||
double idfSum = 0.0;
|
|
||||||
size_t lineno = 0;
|
|
||||||
|
|
||||||
for (; getline(ifs, line); lineno++) {
|
|
||||||
buf.clear();
|
|
||||||
|
|
||||||
if (line.empty()) {
|
|
||||||
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
Split(line, buf, " ");
|
|
||||||
|
|
||||||
if (buf.size() != 2) {
|
|
||||||
XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
idf = atof(buf[1].c_str());
|
|
||||||
idfMap_[buf[0]] = idf;
|
|
||||||
idfSum += idf;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(lineno);
|
|
||||||
idfAverage_ = idfSum / lineno;
|
|
||||||
assert(idfAverage_ > 0.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool Compare(const KeyWord& lhs, const KeyWord& rhs) {
|
static bool Compare(const KeyWord& lhs, const KeyWord& rhs) {
|
||||||
return lhs.weight > rhs.weight;
|
return lhs.weight > rhs.weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
MixSegment segment_;
|
MixSegment segment_;
|
||||||
unordered_map<string, double> idfMap_;
|
IdfTrie idf_trie_;
|
||||||
double idfAverage_;
|
|
||||||
|
|
||||||
unordered_set<Rune> symbols_;
|
unordered_set<Rune> symbols_;
|
||||||
}; // class KeywordExtractor
|
}; // class KeywordExtractor
|
||||||
|
|
|
@ -22,10 +22,11 @@ public:
|
||||||
RuneStrArray::const_iterator end,
|
RuneStrArray::const_iterator end,
|
||||||
vector<WordRange>& words,
|
vector<WordRange>& words,
|
||||||
bool, size_t max_word_len) const override {
|
bool, size_t max_word_len) const override {
|
||||||
vector<DatDag> dags;
|
// vector<DatDag> dags;
|
||||||
dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx
|
// dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx
|
||||||
CalcDP(dags);//动态规划(Dynamic Programming,DP),根据DAG计算最优动态规划路径--jxx
|
// CalcDP(dags);//动态规划(Dynamic Programming,DP),根据DAG计算最优动态规划路径--jxx
|
||||||
CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx
|
// CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx
|
||||||
|
dictTrie_->Find(begin, end, words, max_word_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||||
|
@ -48,6 +49,7 @@ public:
|
||||||
return dictTrie_->IsUserDictSingleChineseWord(value);
|
return dictTrie_->IsUserDictSingleChineseWord(value);
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
|
/*
|
||||||
void CalcDP(vector<DatDag>& dags) const {
|
void CalcDP(vector<DatDag>& dags) const {
|
||||||
double val(0);
|
double val(0);
|
||||||
for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) {
|
for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) {
|
||||||
|
@ -73,6 +75,35 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
/* 倒叙方式重写CalcDP函数,初步测试未发现问题*/
|
||||||
|
void CalcDP(vector<DatDag>& dags) const {
|
||||||
|
double val(0);
|
||||||
|
size_t size = dags.size();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
dags[size - 1 - i].max_next = -1;
|
||||||
|
dags[size - 1 - i].max_weight = MIN_DOUBLE;
|
||||||
|
|
||||||
|
for (const auto & it : dags[size - 1 - i].nexts) {
|
||||||
|
const auto nextPos = it.first;
|
||||||
|
val = dictTrie_->GetMinWeight();
|
||||||
|
|
||||||
|
if (nullptr != it.second) {
|
||||||
|
val = it.second->weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nextPos < dags.size()) {
|
||||||
|
val += dags[nextPos].max_weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((nextPos <= dags.size()) && (val > dags[size - 1 - i].max_weight)) {
|
||||||
|
dags[size - 1 - i].max_weight = val;
|
||||||
|
dags[size - 1 - i].max_next = nextPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void CutByDag(RuneStrArray::const_iterator begin,
|
void CutByDag(RuneStrArray::const_iterator begin,
|
||||||
RuneStrArray::const_iterator,
|
RuneStrArray::const_iterator,
|
||||||
|
|
|
@ -123,64 +123,76 @@ public:
|
||||||
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
|
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, unordered_map<string, KeyWord>& res, bool hmm,
|
||||||
size_t) const override {
|
size_t) const override {
|
||||||
vector<WordRange> words;
|
vector<WordRange> words;
|
||||||
assert(end >= begin);
|
|
||||||
words.reserve(end - begin);
|
|
||||||
mpSeg_.CutRuneArray(begin, end, words);
|
|
||||||
|
|
||||||
vector<WordRange> hmmRes;
|
vector<WordRange> hmmRes;
|
||||||
hmmRes.reserve(end - begin);
|
assert(end >= begin);
|
||||||
|
if (3 == begin->len or 4 == begin->len) {
|
||||||
|
words.reserve(end - begin);
|
||||||
|
mpSeg_.CutRuneArray(begin, end, words);
|
||||||
|
hmmRes.reserve(words.size());
|
||||||
|
} else {
|
||||||
|
hmmRes.reserve(end - begin);
|
||||||
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < words.size(); i++) {
|
if (words.size() != 0) {//存在中文分词结果
|
||||||
|
for (size_t i = 0; i < words.size(); i++) {
|
||||||
|
|
||||||
string str = GetStringFromRunes(s, words[i].left, words[i].right);
|
string str = GetStringFromRunes(s, words[i].left, words[i].right);
|
||||||
|
|
||||||
if (stopWords_.find(str) != stopWords_.end()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (words[i].left != words[i].right) {
|
|
||||||
res[str].offsets.push_back(words[i].left->offset);
|
|
||||||
res[str].weight += 1.0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
|
|
||||||
|| i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
|
|
||||||
if (stopWords_.find(str) != stopWords_.end()) {
|
if (stopWords_.find(str) != stopWords_.end()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
res[str].offsets.push_back(words[i].left->offset);
|
|
||||||
res[str].weight += 1.0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if mp Get a single one and it is not in userdict, collect it in sequence
|
if (words[i].left != words[i].right) {
|
||||||
size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里(i字符不是最后一个字符),直接判定j字符
|
res[str].offsets.push_back(words[i].left->offset);
|
||||||
|
res[str].weight += 1.0;
|
||||||
while (j < (words.size() - 1) && words[j].left == words[j].right &&
|
|
||||||
!mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cut the sequence with hmm
|
|
||||||
assert(j - 1 >= i);
|
|
||||||
// TODO
|
|
||||||
hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
|
|
||||||
|
|
||||||
//put hmm result to result
|
|
||||||
for (size_t k = 0; k < hmmRes.size(); k++) {
|
|
||||||
string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right);
|
|
||||||
if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
res[hmmStr].offsets.push_back(hmmRes[k].left->offset);
|
if (mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune)
|
||||||
res[hmmStr].weight += 1.0;
|
|| i == (words.size() - 1)) {//i++后如果是最后一个字符则直接push_back
|
||||||
|
if (stopWords_.find(str) != stopWords_.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
res[str].offsets.push_back(words[i].left->offset);
|
||||||
|
res[str].weight += 1.0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if mp Get a single one and it is not in userdict, collect it in sequence
|
||||||
|
size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里(i字符不是最后一个字符),直接判定j字符
|
||||||
|
|
||||||
|
while (j < (words.size() - 1)
|
||||||
|
&& words[j].left == words[j].right
|
||||||
|
&& !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cut the sequence with hmm
|
||||||
|
assert(j - 1 >= i);
|
||||||
|
// TODO
|
||||||
|
hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
|
||||||
|
|
||||||
|
//put hmm result to result
|
||||||
|
for (size_t k = 0; k < hmmRes.size(); k++) {
|
||||||
|
string hmmStr = GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right);
|
||||||
|
if (IsSingleWord(hmmStr) || stopWords_.find(hmmStr) != stopWords_.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
res[hmmStr].offsets.push_back(hmmRes[k].left->offset);
|
||||||
|
res[hmmStr].weight += 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
//clear tmp vars
|
||||||
|
hmmRes.clear();
|
||||||
|
|
||||||
|
//let i jump over this piece
|
||||||
|
i = j - 1;
|
||||||
|
}
|
||||||
|
} else {//不存在中文分词结果
|
||||||
|
for (size_t i = 0; i < (size_t)(end - begin); i++) {
|
||||||
|
string str = s.substr((begin+i)->offset, (begin+i)->len);
|
||||||
|
res[str].offsets.push_back((begin+i)->offset);
|
||||||
|
res[str].weight += 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//clear tmp vars
|
|
||||||
hmmRes.clear();
|
|
||||||
|
|
||||||
//let i jump over this piece
|
|
||||||
i = j - 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,6 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
wordRange.left = cursor_;
|
wordRange.left = cursor_;
|
||||||
|
|
||||||
if (cursor_->rune == 0x20) {
|
if (cursor_->rune == 0x20) {
|
||||||
while (cursor_ != sentence_.end()) {
|
while (cursor_ != sentence_.end()) {
|
||||||
if (cursor_->rune != 0x20) {
|
if (cursor_->rune != 0x20) {
|
||||||
|
@ -72,6 +71,9 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int max_num = 0;
|
||||||
|
uint32_t utf8_num = cursor_->len;
|
||||||
|
|
||||||
while (cursor_ != sentence_.end()) {
|
while (cursor_ != sentence_.end()) {
|
||||||
if (cursor_->rune == 0x20) {
|
if (cursor_->rune == 0x20) {
|
||||||
if (wordRange.left == cursor_) {
|
if (wordRange.left == cursor_) {
|
||||||
|
@ -83,6 +85,11 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
cursor_ ++;
|
cursor_ ++;
|
||||||
|
max_num++;
|
||||||
|
if (max_num >= 1024 or cursor_->len != utf8_num) { //todo 防止一次性传入过多字节,暂定限制为1024个字
|
||||||
|
wordRange.right = cursor_;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wordRange.right = sentence_.end();
|
wordRange.right = sentence_.end();
|
||||||
|
|
|
@ -97,24 +97,6 @@ inline RuneArray DecodeRunesInString(const string& s) {
|
||||||
|
|
||||||
//重写DecodeRunesInString函数,将实现放入函数中降低内存占用加快处理流程--jxx20210518
|
//重写DecodeRunesInString函数,将实现放入函数中降低内存占用加快处理流程--jxx20210518
|
||||||
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
|
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
|
||||||
/*
|
|
||||||
RuneArray arr;
|
|
||||||
|
|
||||||
if (not DecodeRunesInString(s, arr)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
runes.clear();
|
|
||||||
|
|
||||||
uint32_t offset = 0;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < arr.size(); ++i) {
|
|
||||||
const uint32_t len = limonp::UnicodeToUtf8Bytes(arr[i]);
|
|
||||||
RuneInfo x(arr[i], offset, len, i, 1);
|
|
||||||
runes.push_back(x);
|
|
||||||
offset += len;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
uint32_t tmp;
|
uint32_t tmp;
|
||||||
uint32_t offset = 0;
|
uint32_t offset = 0;
|
||||||
|
|
|
@ -2,6 +2,7 @@ INCLUDEPATH += $$PWD
|
||||||
|
|
||||||
HEADERS += \
|
HEADERS += \
|
||||||
$$PWD/DictTrie.hpp \
|
$$PWD/DictTrie.hpp \
|
||||||
|
$$PWD/IdfTrie.hpp \
|
||||||
$$PWD/FullSegment.hpp \
|
$$PWD/FullSegment.hpp \
|
||||||
$$PWD/HMMModel.hpp \
|
$$PWD/HMMModel.hpp \
|
||||||
$$PWD/HMMSegment.hpp \
|
$$PWD/HMMSegment.hpp \
|
||||||
|
@ -17,5 +18,4 @@ HEADERS += \
|
||||||
$$PWD/TextRankExtractor.hpp \
|
$$PWD/TextRankExtractor.hpp \
|
||||||
$$PWD/Trie.hpp \
|
$$PWD/Trie.hpp \
|
||||||
$$PWD/Unicode.hpp
|
$$PWD/Unicode.hpp
|
||||||
|
|
||||||
include(limonp/limonp.pri)
|
include(limonp/limonp.pri)
|
||||||
|
|
|
@ -19,6 +19,8 @@ DEFINES += QT_DEPRECATED_WARNINGS
|
||||||
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
||||||
include(cppjieba/cppjieba.pri)
|
include(cppjieba/cppjieba.pri)
|
||||||
|
|
||||||
|
#LIBS += -L/usr/local/lib/libjemalloc -ljemalloc
|
||||||
|
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
chinese-segmentation.cpp \
|
chinese-segmentation.cpp \
|
||||||
|
|
||||||
|
|
|
@ -228,10 +228,11 @@ void AppMatch::getDesktopFilePath() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void AppMatch::getAppName(QMap<NameString, QStringList> &installed) {
|
void AppMatch::getAppName(QMap<NameString, QStringList> &installed) {
|
||||||
QMap<NameString, QStringList>::const_iterator i;
|
// QMap<NameString, QStringList>::const_iterator i;
|
||||||
for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) {
|
// for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) {
|
||||||
appNameMatch(i.key().app_name, installed);
|
// appNameMatch(i.key().app_name, installed);
|
||||||
}
|
// }
|
||||||
|
appNameMatch(installed);
|
||||||
qDebug() << "installed app match is successful!";
|
qDebug() << "installed app match is successful!";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -276,6 +277,38 @@ void AppMatch::appNameMatch(QString appname, QMap<NameString, QStringList> &inst
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void AppMatch::appNameMatch(QMap<NameString, QStringList> &installed) {
|
||||||
|
QStringList list;
|
||||||
|
NameString name;
|
||||||
|
QMapIterator<NameString, QStringList> iter(m_installAppMap);
|
||||||
|
while(iter.hasNext()) {
|
||||||
|
iter.next();
|
||||||
|
list = iter.value();
|
||||||
|
name.app_name = iter.key().app_name;
|
||||||
|
if(iter.key().app_name.contains(m_sourceText, Qt::CaseInsensitive)) {
|
||||||
|
installed.insert(name, list);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
QStringList pinyinlist;
|
||||||
|
pinyinlist = FileUtils::findMultiToneWords(iter.key().app_name);
|
||||||
|
|
||||||
|
for(int i = 0; i < pinyinlist.size() / 2; i++) {
|
||||||
|
QString shouzimu = pinyinlist.at(2 * i + 1); // 中文转首字母
|
||||||
|
if(shouzimu.contains(m_sourceText, Qt::CaseInsensitive)) {
|
||||||
|
installed.insert(name, list);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(m_sourceText.size() < 2)
|
||||||
|
break;
|
||||||
|
QString pinyin = pinyinlist.at(2 * i); // 中文转拼音
|
||||||
|
if(pinyin.contains(m_sourceText, Qt::CaseInsensitive)) {
|
||||||
|
installed.insert(name, list);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void AppMatch::softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn) {
|
void AppMatch::softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn) {
|
||||||
// if(m_interFace->timeout() != -1) {
|
// if(m_interFace->timeout() != -1) {
|
||||||
|
|
|
@ -65,6 +65,7 @@ private:
|
||||||
void getAppName(QMap<NameString, QStringList> &installed);
|
void getAppName(QMap<NameString, QStringList> &installed);
|
||||||
// void appNameMatch(QString appname,QString desktoppath,QString appicon);
|
// void appNameMatch(QString appname,QString desktoppath,QString appicon);
|
||||||
void appNameMatch(QString appname, QMap<NameString, QStringList> &installed);
|
void appNameMatch(QString appname, QMap<NameString, QStringList> &installed);
|
||||||
|
void appNameMatch(QMap<NameString, QStringList> &installed);
|
||||||
|
|
||||||
void softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn);
|
void softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn);
|
||||||
|
|
||||||
|
|
|
@ -179,6 +179,22 @@ QString FileUtils::getSettingName(const QString& setting) {
|
||||||
return setting.right(setting.length() - setting.lastIndexOf("/") - 1);
|
return setting.right(setting.length() - setting.lastIndexOf("/") - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool FileUtils::isOrUnder(QString pathA, QString pathB)
|
||||||
|
{
|
||||||
|
if(pathA[0] != "/")
|
||||||
|
pathA.prepend("/");
|
||||||
|
if(pathB[0] != "/")
|
||||||
|
pathB.prepend("/");
|
||||||
|
|
||||||
|
if(pathA.length() < pathB.length())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if(pathA == pathB || pathA.startsWith(pathB + "/"))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void FileUtils::loadHanziTable(const QString &fileName) {
|
void FileUtils::loadHanziTable(const QString &fileName) {
|
||||||
QFile file(fileName);
|
QFile file(fileName);
|
||||||
|
@ -484,8 +500,10 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
|
||||||
if(!file.open(QuaZip::mdUnzip))
|
if(!file.open(QuaZip::mdUnzip))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive))
|
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) {
|
||||||
|
file.close();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
QuaZipFile fileR(&file);
|
QuaZipFile fileR(&file);
|
||||||
|
|
||||||
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
||||||
|
@ -546,8 +564,10 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
|
||||||
if(i.startsWith(prefix))
|
if(i.startsWith(prefix))
|
||||||
fileList << i;
|
fileList << i;
|
||||||
}
|
}
|
||||||
if(fileList.isEmpty())
|
if(fileList.isEmpty()) {
|
||||||
|
file.close();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for(int i = 0; i < fileList.size(); ++i){
|
for(int i = 0; i < fileList.size(); ++i){
|
||||||
QString name = prefix + QString::number(i + 1) + ".xml";
|
QString name = prefix + QString::number(i + 1) + ".xml";
|
||||||
|
@ -651,8 +671,10 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
|
||||||
if(!file.open(QuaZip::mdUnzip))
|
if(!file.open(QuaZip::mdUnzip))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive))
|
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) {
|
||||||
|
file.close();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
QuaZipFile fileR(&file);
|
QuaZipFile fileR(&file);
|
||||||
|
|
||||||
fileR.open(QIODevice::ReadOnly);
|
fileR.open(QIODevice::ReadOnly);
|
||||||
|
@ -707,8 +729,10 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
|
||||||
|
|
||||||
void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
|
void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
|
||||||
Poppler::Document *doc = Poppler::Document::load(path);
|
Poppler::Document *doc = Poppler::Document::load(path);
|
||||||
if(doc->isLocked())
|
if(doc->isLocked()) {
|
||||||
|
delete doc;
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
const QRectF qf;
|
const QRectF qf;
|
||||||
int pageNum = doc->numPages();
|
int pageNum = doc->numPages();
|
||||||
for(int i = 0; i < pageNum; ++i) {
|
for(int i = 0; i < pageNum; ++i) {
|
||||||
|
|
|
@ -67,6 +67,8 @@ public:
|
||||||
static QString getFileName(const QString &);
|
static QString getFileName(const QString &);
|
||||||
static QString getAppName(const QString &);
|
static QString getAppName(const QString &);
|
||||||
static QString getSettingName(const QString &);
|
static QString getSettingName(const QString &);
|
||||||
|
//A is or under B
|
||||||
|
static bool isOrUnder(QString pathA, QString pathB);
|
||||||
|
|
||||||
//chinese character to pinyin
|
//chinese character to pinyin
|
||||||
static QMap<QString, QStringList> map_chinese2pinyin;
|
static QMap<QString, QStringList> map_chinese2pinyin;
|
||||||
|
|
|
@ -148,7 +148,7 @@ bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool rem
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// if(!path.startsWith("/home")) {
|
// if(!path.startsWith("/home")) {
|
||||||
//// returnCode = QString(tr("I can only search your user directory, it doesn't make any sense if you block an directory which is not in user directory!"));
|
// returnCode = QString(tr("I can only search your user directory, it doesn't make any sense if you block an directory which is not in user directory!"));
|
||||||
// returnCode = PATH_NOT_IN_HOME;
|
// returnCode = PATH_NOT_IN_HOME;
|
||||||
// return false;
|
// return false;
|
||||||
// }
|
// }
|
||||||
|
@ -156,15 +156,19 @@ bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool rem
|
||||||
//why QSetting's key can't start with "/"??
|
//why QSetting's key can't start with "/"??
|
||||||
QString pathKey = path.right(path.length() - 1);
|
QString pathKey = path.right(path.length() - 1);
|
||||||
|
|
||||||
|
if (pathKey.endsWith(QLatin1Char('/'))) {
|
||||||
|
pathKey = pathKey.mid(0, pathKey.length() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
QStringList blockDirs = m_block_dirs_settings->allKeys();
|
QStringList blockDirs = m_block_dirs_settings->allKeys();
|
||||||
for(QString i : blockDirs) {
|
for(QString i : blockDirs) {
|
||||||
if(pathKey.startsWith(i)) {
|
if(FileUtils::isOrUnder(pathKey, i)) {
|
||||||
// returnCode = QString(tr("My parent folder has been blocked!"));
|
// returnCode = QString(tr("My parent folder has been blocked!"));
|
||||||
returnCode = PATH_PARENT_BLOCKED;
|
returnCode = PATH_PARENT_BLOCKED;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(i.startsWith(pathKey))
|
if(FileUtils::isOrUnder(i, pathKey))
|
||||||
m_block_dirs_settings->remove(i);
|
m_block_dirs_settings->remove(i);
|
||||||
}
|
}
|
||||||
m_block_dirs_settings->setValue(pathKey, "0");
|
m_block_dirs_settings->setValue(pathKey, "0");
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
#include <QDBusInterface>
|
#include <QDBusInterface>
|
||||||
#include <QApplication>
|
#include <QApplication>
|
||||||
#include "libsearch_global.h"
|
#include "libsearch_global.h"
|
||||||
|
#include "file-utils.h"
|
||||||
|
|
||||||
#define CONTROL_CENTER_PERSONALISE_GSETTINGS_ID "org.ukui.control-center.personalise"
|
#define CONTROL_CENTER_PERSONALISE_GSETTINGS_ID "org.ukui.control-center.personalise"
|
||||||
#define TRANSPARENCY_KEY "transparency"
|
#define TRANSPARENCY_KEY "transparency"
|
||||||
|
|
|
@ -108,12 +108,14 @@ void ConstructDocumentForContent::run() {
|
||||||
FileReader::getTextContent(m_path, content);
|
FileReader::getTextContent(m_path, content);
|
||||||
if(content.isEmpty())
|
if(content.isEmpty())
|
||||||
return;
|
return;
|
||||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
//QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
||||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
//QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
||||||
Document doc;
|
Document doc;
|
||||||
doc.setData(content);
|
doc.setData(content);
|
||||||
doc.setUniqueTerm(uniqueterm);
|
//doc.setUniqueTerm(uniqueterm);
|
||||||
doc.addTerm(upTerm);
|
doc.setUniqueTerm(FileUtils::makeDocUterm(m_path));
|
||||||
|
//doc.addTerm(upTerm);
|
||||||
|
doc.addTerm(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
||||||
doc.addValue(m_path);
|
doc.addValue(m_path);
|
||||||
|
|
||||||
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
||||||
|
@ -131,6 +133,7 @@ void ConstructDocumentForContent::run() {
|
||||||
IndexGenerator::_mutex_doc_list_content.unlock();
|
IndexGenerator::_mutex_doc_list_content.unlock();
|
||||||
content.clear();
|
content.clear();
|
||||||
content.squeeze();
|
content.squeeze();
|
||||||
|
|
||||||
term.clear();
|
term.clear();
|
||||||
term.shrink_to_fit();
|
term.shrink_to_fit();
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -37,7 +37,7 @@ void Document::addPosting(std::string term, QVector<size_t> offset, int weight)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Document::addPosting(std::string term, std::vector<size_t> offset, int weight) {
|
void Document::addPosting(std::string &term, std::vector<size_t> &offset, int weight) {
|
||||||
if(term == "")
|
if(term == "")
|
||||||
return;
|
return;
|
||||||
if(term.length() > 240)
|
if(term.length() > 240)
|
||||||
|
@ -63,6 +63,12 @@ void Document::addTerm(QString term) {
|
||||||
m_document.add_term(term.toStdString());
|
m_document.add_term(term.toStdString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Document::addTerm(std::string term) {
|
||||||
|
if(term.empty())
|
||||||
|
return;
|
||||||
|
m_document.add_term(term);
|
||||||
|
}
|
||||||
|
|
||||||
void Document::addValue(QString value) {
|
void Document::addValue(QString value) {
|
||||||
m_document.add_value(1, value.toStdString());
|
m_document.add_value(1, value.toStdString());
|
||||||
}
|
}
|
||||||
|
@ -73,12 +79,20 @@ void Document::setUniqueTerm(QString term) {
|
||||||
m_document.add_term(term.toStdString());
|
m_document.add_term(term.toStdString());
|
||||||
|
|
||||||
// m_unique_term = new QString(term);
|
// m_unique_term = new QString(term);
|
||||||
m_unique_term = std::move(term);
|
m_unique_term = std::move(term.toStdString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Document::setUniqueTerm(std::string term) {
|
||||||
|
if(term.empty())
|
||||||
|
return;
|
||||||
|
m_document.add_term(term);
|
||||||
|
m_unique_term = term;
|
||||||
|
}
|
||||||
|
|
||||||
std::string Document::getUniqueTerm() {
|
std::string Document::getUniqueTerm() {
|
||||||
// qDebug()<<"m_unique_term!"<<*m_unique_term;
|
// qDebug()<<"m_unique_term!"<<*m_unique_term;
|
||||||
// qDebug() << QString::fromStdString(m_unique_term.toStdString());
|
// qDebug() << QString::fromStdString(m_unique_term.toStdString());
|
||||||
return m_unique_term.toStdString();
|
return m_unique_term;//.toStdString();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Document::setIndexText(QStringList indexText) {
|
void Document::setIndexText(QStringList indexText) {
|
||||||
|
|
|
@ -41,11 +41,13 @@ public:
|
||||||
}
|
}
|
||||||
void setData(QString &data);
|
void setData(QString &data);
|
||||||
void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
|
void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
|
||||||
void addPosting(std::string term, std::vector<size_t> offset, int weight = 1);
|
void addPosting(std::string &term, std::vector<size_t> &offset, int weight = 1);
|
||||||
void addPosting(std::string term, unsigned int offset, int weight = 1);
|
void addPosting(std::string term, unsigned int offset, int weight = 1);
|
||||||
void addTerm(QString term);
|
void addTerm(QString term);
|
||||||
|
void addTerm(std::string term);
|
||||||
void addValue(QString value);
|
void addValue(QString value);
|
||||||
void setUniqueTerm(QString term);
|
void setUniqueTerm(QString term);
|
||||||
|
void setUniqueTerm(std::string term);
|
||||||
std::string getUniqueTerm();
|
std::string getUniqueTerm();
|
||||||
void setIndexText(QStringList indexText);
|
void setIndexText(QStringList indexText);
|
||||||
QStringList getIndexText();
|
QStringList getIndexText();
|
||||||
|
@ -53,7 +55,8 @@ public:
|
||||||
private:
|
private:
|
||||||
Xapian::Document m_document;
|
Xapian::Document m_document;
|
||||||
QStringList m_index_text;
|
QStringList m_index_text;
|
||||||
QString m_unique_term;
|
//QString m_unique_term;
|
||||||
|
std::string m_unique_term;
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,19 +97,6 @@ void FirstIndex::run() {
|
||||||
QTime t1 = QTime::currentTime();
|
QTime t1 = QTime::currentTime();
|
||||||
|
|
||||||
// Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running.
|
// Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running.
|
||||||
QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search");
|
|
||||||
if(!fifoDir.exists())
|
|
||||||
qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath());
|
|
||||||
|
|
||||||
unlink(UKUI_SEARCH_PIPE_PATH);
|
|
||||||
int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777);
|
|
||||||
if(retval == -1) {
|
|
||||||
qCritical() << "creat fifo error!!";
|
|
||||||
syslog(LOG_ERR, "creat fifo error!!\n");
|
|
||||||
assert(false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
qDebug() << "create fifo success\n";
|
|
||||||
|
|
||||||
QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString();
|
QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString();
|
||||||
QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString();
|
QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString();
|
||||||
|
@ -154,7 +141,6 @@ void FirstIndex::run() {
|
||||||
|
|
||||||
|
|
||||||
++FileUtils::_index_status;
|
++FileUtils::_index_status;
|
||||||
|
|
||||||
pid_t pid;
|
pid_t pid;
|
||||||
pid = fork();
|
pid = fork();
|
||||||
if(pid == 0) {
|
if(pid == 0) {
|
||||||
|
@ -240,6 +226,7 @@ void FirstIndex::run() {
|
||||||
qDebug() << "content index end;";
|
qDebug() << "content index end;";
|
||||||
sem.release(2);
|
sem.release(2);
|
||||||
});
|
});
|
||||||
|
|
||||||
mutex1.lock();
|
mutex1.lock();
|
||||||
mutex2.lock();
|
mutex2.lock();
|
||||||
mutex3.lock();
|
mutex3.lock();
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
#include "index-generator.h"
|
#include "index-generator.h"
|
||||||
#include "chinese-segmentation.h"
|
#include "chinese-segmentation.h"
|
||||||
#include <QStandardPaths>
|
#include <QStandardPaths>
|
||||||
|
#include <malloc.h>
|
||||||
|
|
||||||
//#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
|
//#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
|
||||||
//#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
|
//#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
|
||||||
|
@ -129,11 +129,11 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList) {
|
||||||
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
|
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
|
||||||
// FileUtils::_index_status &= ~0x2;
|
// FileUtils::_index_status &= ~0x2;
|
||||||
qDebug() << "finish creatAllIndex for content";
|
qDebug() << "finish creatAllIndex for content";
|
||||||
|
|
||||||
IndexGenerator::_doc_list_content.clear();
|
IndexGenerator::_doc_list_content.clear();
|
||||||
IndexGenerator::_doc_list_content.squeeze();
|
IndexGenerator::_doc_list_content.squeeze();
|
||||||
QVector<Document>().swap(IndexGenerator::_doc_list_content);
|
QVector<Document>().swap(IndexGenerator::_doc_list_content);
|
||||||
// delete _doc_list_content;
|
malloc_trim(0);
|
||||||
// _doc_list_content = nullptr;
|
|
||||||
}
|
}
|
||||||
Q_EMIT this->transactionFinished();
|
Q_EMIT this->transactionFinished();
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#include "inotify-watch.h"
|
#include "inotify-watch.h"
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
|
#include <errno.h>
|
||||||
using namespace Zeeker;
|
using namespace Zeeker;
|
||||||
static InotifyWatch* global_instance_InotifyWatch = nullptr;
|
static InotifyWatch* global_instance_InotifyWatch = nullptr;
|
||||||
|
|
||||||
|
@ -48,7 +49,7 @@ bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase)
|
||||||
for(QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();) {
|
for(QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();) {
|
||||||
// qDebug() << i.value();
|
// qDebug() << i.value();
|
||||||
// if(i.value().length() > path.length()) {
|
// if(i.value().length() > path.length()) {
|
||||||
if(i.value().startsWith(path)) {
|
if(FileUtils::isOrUnder(i.value(), path)) {
|
||||||
qDebug() << "remove path: " << i.value();
|
qDebug() << "remove path: " << i.value();
|
||||||
inotify_rm_watch(m_inotifyFd, currentPath.key(path));
|
inotify_rm_watch(m_inotifyFd, currentPath.key(path));
|
||||||
PendingFile f(i.value());
|
PendingFile f(i.value());
|
||||||
|
@ -64,8 +65,9 @@ bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase)
|
||||||
for(QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();) {
|
for(QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();) {
|
||||||
// qDebug() << i.value();
|
// qDebug() << i.value();
|
||||||
if(i.value().length() > path.length()) {
|
if(i.value().length() > path.length()) {
|
||||||
if(i.value().startsWith(path)) {
|
if(FileUtils::isOrUnder(i.value(), path)) {
|
||||||
qDebug() << "remove path: " << i.value();
|
// if(i.value().startsWith(path + "/")) {
|
||||||
|
// qDebug() << "remove path: " << i.value();
|
||||||
inotify_rm_watch(m_inotifyFd, currentPath.key(path));
|
inotify_rm_watch(m_inotifyFd, currentPath.key(path));
|
||||||
currentPath.erase(i++);
|
currentPath.erase(i++);
|
||||||
} else {
|
} else {
|
||||||
|
@ -135,7 +137,17 @@ void InotifyWatch::run()
|
||||||
if (m_inotifyFd > 0) {
|
if (m_inotifyFd > 0) {
|
||||||
qDebug()<<"Inotify init success!";
|
qDebug()<<"Inotify init success!";
|
||||||
} else {
|
} else {
|
||||||
Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify");
|
qWarning() << "Inotify init fail! Now try add inotify_user_instances.";
|
||||||
|
UkuiSearchQDBus usQDBus;
|
||||||
|
usQDBus.addInotifyUserInstances(128);
|
||||||
|
m_inotifyFd = inotify_init();
|
||||||
|
if (m_inotifyFd > 0) {
|
||||||
|
qDebug()<<"Inotify init success!";
|
||||||
|
} else {
|
||||||
|
printf("errno=%d\n",errno);
|
||||||
|
printf("Mesg:%s\n",strerror(errno));
|
||||||
|
Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// this->addWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
|
// this->addWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
|
||||||
|
@ -208,10 +220,12 @@ void InotifyWatch::run()
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
qDebug() << "Leave watch loop";
|
||||||
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
|
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
|
||||||
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3");
|
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3");
|
||||||
removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false);
|
removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false);
|
||||||
}
|
}
|
||||||
|
close(m_inotifyFd);
|
||||||
// fcntl(m_inotifyFd, F_SETFD, FD_CLOEXEC);
|
// fcntl(m_inotifyFd, F_SETFD, FD_CLOEXEC);
|
||||||
// m_notifier = new QSocketNotifier(m_inotifyFd, QSocketNotifier::Read);
|
// m_notifier = new QSocketNotifier(m_inotifyFd, QSocketNotifier::Read);
|
||||||
// connect(m_notifier, &QSocketNotifier::activated, this, &InotifyWatch::slotEvent, Qt::DirectConnection);
|
// connect(m_notifier, &QSocketNotifier::activated, this, &InotifyWatch::slotEvent, Qt::DirectConnection);
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#include "pending-file-queue.h"
|
#include "pending-file-queue.h"
|
||||||
|
#include "file-utils.h"
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
using namespace Zeeker;
|
using namespace Zeeker;
|
||||||
static PendingFileQueue *global_instance_pending_file_queue = nullptr;
|
static PendingFileQueue *global_instance_pending_file_queue = nullptr;
|
||||||
|
@ -88,7 +89,7 @@ void PendingFileQueue::enqueue(const PendingFile &file)
|
||||||
// Because our datebase need to delete those indexs one by one.
|
// Because our datebase need to delete those indexs one by one.
|
||||||
if(file.shouldRemoveIndex() && file.isDir()) {
|
if(file.shouldRemoveIndex() && file.isDir()) {
|
||||||
const auto keepFile = [&file](const PendingFile& pending) {
|
const auto keepFile = [&file](const PendingFile& pending) {
|
||||||
return (!pending.path().startsWith(file.path()) || pending.shouldRemoveIndex());
|
return (!FileUtils::isOrUnder(pending.path(), file.path()) || pending.shouldRemoveIndex());
|
||||||
};
|
};
|
||||||
const auto end = m_cache.end();
|
const auto end = m_cache.end();
|
||||||
const auto droppedFilesBegin = std::stable_partition(m_cache.begin(), end, keepFile);
|
const auto droppedFilesBegin = std::stable_partition(m_cache.begin(), end, keepFile);
|
||||||
|
|
|
@ -109,7 +109,7 @@ void SearchManager::onKeywordSearch(QString keyword, QQueue<QString> *searchResu
|
||||||
bool SearchManager::isBlocked(QString &path) {
|
bool SearchManager::isBlocked(QString &path) {
|
||||||
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
|
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
|
||||||
for(QString i : blockList) {
|
for(QString i : blockList) {
|
||||||
if(path.startsWith(i.prepend("/")))
|
if(FileUtils::isOrUnder(path, i))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -798,7 +798,7 @@ void DirectSearch::run() {
|
||||||
|
|
||||||
bool findIndex = false;
|
bool findIndex = false;
|
||||||
for (QString j : blockList) {
|
for (QString j : blockList) {
|
||||||
if (i.absoluteFilePath().startsWith(j.prepend("/"))) {
|
if (FileUtils::isOrUnder(i.absoluteFilePath(), j)) {
|
||||||
findIndex = true;
|
findIndex = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,21 @@ void SearchMethodManager::searchMethod(FileUtils::SearchMethod sm) {
|
||||||
qWarning("enum class error!!!\n");
|
qWarning("enum class error!!!\n");
|
||||||
}
|
}
|
||||||
if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::_index_status) {
|
if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::_index_status) {
|
||||||
|
|
||||||
|
// Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running.
|
||||||
|
QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search");
|
||||||
|
if(!fifoDir.exists())
|
||||||
|
qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath());
|
||||||
|
|
||||||
|
unlink(UKUI_SEARCH_PIPE_PATH);
|
||||||
|
int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777);
|
||||||
|
if(retval == -1) {
|
||||||
|
qCritical() << "creat fifo error!!";
|
||||||
|
syslog(LOG_ERR, "creat fifo error!!\n");
|
||||||
|
assert(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
qDebug() << "create fifo success\n";
|
||||||
qWarning() << "start first index";
|
qWarning() << "start first index";
|
||||||
m_fi.start();
|
m_fi.start();
|
||||||
qWarning() << "start inotify index";
|
qWarning() << "start inotify index";
|
||||||
|
|
|
@ -42,5 +42,15 @@ void UkuiSearchQDBus::setInotifyMaxUserWatches() {
|
||||||
// sysctl
|
// sysctl
|
||||||
this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2");
|
this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2");
|
||||||
// /etc/sysctl.conf
|
// /etc/sysctl.conf
|
||||||
// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3");
|
// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3");
|
||||||
|
}
|
||||||
|
|
||||||
|
int UkuiSearchQDBus::addInotifyUserInstances(int addNum)
|
||||||
|
{
|
||||||
|
QDBusReply<int> reply = tmpSystemQDBusInterface->call("AddInotifyMaxUserInstance", addNum);
|
||||||
|
if(reply.isValid()) {
|
||||||
|
qDebug() << "Set inotify_max_user_instances to" << reply.value();
|
||||||
|
} else {
|
||||||
|
qWarning() << "Call AddInotifyMaxUserInstance failed!";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,12 +21,14 @@
|
||||||
#define UKUISEARCHQDBUS_H
|
#define UKUISEARCHQDBUS_H
|
||||||
|
|
||||||
#include <QDBusInterface>
|
#include <QDBusInterface>
|
||||||
|
#include <QDBusReply>
|
||||||
namespace Zeeker {
|
namespace Zeeker {
|
||||||
class UkuiSearchQDBus {
|
class UkuiSearchQDBus {
|
||||||
public:
|
public:
|
||||||
UkuiSearchQDBus();
|
UkuiSearchQDBus();
|
||||||
~UkuiSearchQDBus();
|
~UkuiSearchQDBus();
|
||||||
void setInotifyMaxUserWatches();
|
void setInotifyMaxUserWatches();
|
||||||
|
int addInotifyUserInstances(int addNum);
|
||||||
private:
|
private:
|
||||||
QDBusInterface* tmpSystemQDBusInterface;
|
QDBusInterface* tmpSystemQDBusInterface;
|
||||||
};
|
};
|
||||||
|
|
|
@ -33,7 +33,7 @@ include(plugininterface/plugin-interface.pri)
|
||||||
include(pluginmanage/plugin-manager.pri)
|
include(pluginmanage/plugin-manager.pri)
|
||||||
|
|
||||||
LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation
|
LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation
|
||||||
LIBS += -lxapian -lquazip5 -luchardet
|
LIBS += -lxapian -lquazip5 -luchardet #-L/usr/local/lib/libjemalloc -ljemalloc
|
||||||
|
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
file-utils.cpp \
|
file-utils.cpp \
|
||||||
|
|
|
@ -730,7 +730,7 @@ void ContentWidget::onListViewRowChanged(SearchListView * listview, const int &t
|
||||||
if(type == SearchItem::SearchType::Contents && !m_contentDetailList.isEmpty()) {
|
if(type == SearchItem::SearchType::Contents && !m_contentDetailList.isEmpty()) {
|
||||||
m_detailView->isContent = true;
|
m_detailView->isContent = true;
|
||||||
m_detailView->setContent(m_contentDetailList.at(listview->currentIndex().row()), m_keyword);
|
m_detailView->setContent(m_contentDetailList.at(listview->currentIndex().row()), m_keyword);
|
||||||
} else if(type == SearchItem::SearchType::Best && !m_bestContent.isEmpty() && listview->currentIndex().row() == listview->getLength() - 1) {
|
} else if(type == SearchItem::SearchType::Best && !m_bestContent.isEmpty() && SearchItem::SearchType::Contents == m_bestList.at(listview->currentIndex().row()).first) {
|
||||||
m_detailView->setContent(m_bestContent, m_keyword);
|
m_detailView->setContent(m_bestContent, m_keyword);
|
||||||
m_detailView->isContent = true;
|
m_detailView->isContent = true;
|
||||||
m_detailView->setupWidget(SearchItem::SearchType::Contents, path);
|
m_detailView->setupWidget(SearchItem::SearchType::Contents, path);
|
||||||
|
|
|
@ -139,6 +139,8 @@ MainWindow::MainWindow(QWidget *parent) :
|
||||||
this->m_searchLayout->focusIn(); //打开主界面时输入框夺焦,可直接输入
|
this->m_searchLayout->focusIn(); //打开主界面时输入框夺焦,可直接输入
|
||||||
this->raise();
|
this->raise();
|
||||||
this->activateWindow();
|
this->activateWindow();
|
||||||
|
} else if(this->isVisible()&&!this->isActiveWindow()) {
|
||||||
|
this->activateWindow();
|
||||||
} else {
|
} else {
|
||||||
tryHideMainwindow();
|
tryHideMainwindow();
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,3 +19,4 @@ src.depends = libsearch
|
||||||
|
|
||||||
CONFIG += ordered
|
CONFIG += ordered
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -102,6 +102,36 @@ QString SysdbusRegister::setInotifyMaxUserWatchesStep3() {
|
||||||
return QString(ba);
|
return QString(ba);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int SysdbusRegister::AddInotifyMaxUserInstance(int addNum)
|
||||||
|
{
|
||||||
|
QFile file("/proc/sys/fs/inotify/max_user_instances");
|
||||||
|
if(!file.open(QIODevice::ReadOnly | QIODevice::Text))
|
||||||
|
return -1;
|
||||||
|
QTextStream ts(&file);
|
||||||
|
QString s = ts.read(512);
|
||||||
|
int instances = s.toInt() + addNum;
|
||||||
|
|
||||||
|
QByteArray ba;
|
||||||
|
FILE * fp = NULL;
|
||||||
|
char cmd[128];
|
||||||
|
char buf[1024];
|
||||||
|
sprintf(cmd, "sysctl -w fs.inotify.max_user_instances=\"%d\"", instances);
|
||||||
|
if((fp = popen(cmd, "r")) != NULL) {
|
||||||
|
rewind(fp);
|
||||||
|
while(!feof(fp)) {
|
||||||
|
fgets(buf, sizeof(buf), fp);
|
||||||
|
ba.append(buf);
|
||||||
|
}
|
||||||
|
pclose(fp);
|
||||||
|
fp = NULL;
|
||||||
|
} else {
|
||||||
|
qWarning() << "popen open failed";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return instances;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
//The following example comes from control center
|
//The following example comes from control center
|
||||||
|
|
||||||
//void SysdbusRegister::setAutoLoginStatus(QString username) {
|
//void SysdbusRegister::setAutoLoginStatus(QString username) {
|
||||||
|
|
|
@ -52,6 +52,7 @@ public slots:
|
||||||
Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep1();
|
Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep1();
|
||||||
Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep2();
|
Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep2();
|
||||||
Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep3();
|
Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep3();
|
||||||
|
Q_SCRIPTABLE int AddInotifyMaxUserInstance(int addNum);
|
||||||
|
|
||||||
// // 设置免密登录状态
|
// // 设置免密登录状态
|
||||||
// Q_SCRIPTABLE void setNoPwdLoginStatus();
|
// Q_SCRIPTABLE void setNoPwdLoginStatus();
|
||||||
|
|
Loading…
Reference in New Issue