Merge branch '0520-dev-v4' into 'V4-dev'
merge from main and modified index path. See merge request kylin-desktop/ukui-search!20
This commit is contained in:
commit
f02e2775fa
|
@ -1,3 +1,28 @@
|
|||
ukui-search (0.4.0+0530) v101; urgency=medium
|
||||
|
||||
* Bug 57129
|
||||
* 任务 无
|
||||
|
||||
-- zhangpengfei <zhangpengfei@kylinos.cn> Sun, 30 May 2021 11:21:37 +0800
|
||||
|
||||
ukui-search (0.4.0+0520) v101; urgency=medium
|
||||
|
||||
* Bug 55034,55545,55326,55496
|
||||
* 任务29459
|
||||
* 其他改动:
|
||||
* Fix:Seletion is cleared when paletted changed.
|
||||
- 修复收到palettechanged信号时列表选中状态消失的问题。
|
||||
* Fix:Icon in detail view will not refresh when icon-theme changed.
|
||||
- 修复主题图标改变时详情页图标未更新的问题。
|
||||
* Fix:Index process crashed when parsing some wps templates.
|
||||
- 修复了解析某些wps模板文件时索引崩溃的问题。
|
||||
* Fix:Block list conf won't work.
|
||||
- 修复了在控制面板设置黑名单不能及时生效的问题。
|
||||
* Fix:Automatic completion won't work in next search after clicked actions in detial widget.
|
||||
- 修复了点击详情页不会保存搜索历史记录的bug。
|
||||
|
||||
-- zhangpengfei <zhangpengfei@kylinos.cn> Thu, 20 May 2021 09:08:15 +0800
|
||||
|
||||
ukui-search (0.4.0+0508) v101; urgency=medium
|
||||
|
||||
* Bug 49153.
|
||||
|
|
|
@ -30,12 +30,12 @@ ChineseSegmentation::ChineseSegmentation() {
|
|||
const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
|
||||
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
|
||||
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
|
||||
|
||||
m_jieba = new cppjieba::Jieba(DICT_PATH,
|
||||
HMM_PATH,
|
||||
USER_DICT_PATH,
|
||||
IDF_PATH,
|
||||
STOP_WORD_PATH);
|
||||
STOP_WORD_PATH,
|
||||
"");
|
||||
}
|
||||
|
||||
ChineseSegmentation::~ChineseSegmentation() {
|
||||
|
@ -52,10 +52,10 @@ ChineseSegmentation *ChineseSegmentation::getInstance() {
|
|||
return global_instance_chinese_segmentation;
|
||||
}
|
||||
|
||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString str) {
|
||||
std::string s;
|
||||
s = str.toStdString();
|
||||
str.squeeze();
|
||||
QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
|
||||
// std::string s;
|
||||
// s = str.toStdString();
|
||||
// str.squeeze();
|
||||
|
||||
const size_t topk = -1;
|
||||
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
||||
|
@ -72,6 +72,15 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(QString str) {
|
|||
|
||||
}
|
||||
|
||||
std::vector<cppjieba::KeywordExtractor::Word> ChineseSegmentation::callSegementStd(const std::string &str) {
|
||||
|
||||
const size_t topk = -1;
|
||||
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
||||
ChineseSegmentation::m_jieba->extractor.Extract(str, keywordres, topk);
|
||||
|
||||
return keywordres;
|
||||
}
|
||||
|
||||
void ChineseSegmentation::convert(std::vector<cppjieba::KeywordExtractor::Word> &keywordres, QVector<SKeyWord> &kw) {
|
||||
for(auto i : keywordres) {
|
||||
SKeyWord temp;
|
||||
|
|
|
@ -47,7 +47,10 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
|
|||
public:
|
||||
static ChineseSegmentation *getInstance();
|
||||
~ChineseSegmentation();
|
||||
QVector<SKeyWord> callSegement(QString str);
|
||||
QVector<SKeyWord> callSegement(std::string s);
|
||||
//新添加callSegementStd函数,修改返回值为std::vector<cppjieba::KeywordExtractor::Word>并简化内部处理流程--jxx20210517
|
||||
//修改函数入参形式为引用,去掉Qstring与std::string转换代码--jxx20210519
|
||||
std::vector<cppjieba::KeywordExtractor::Word> callSegementStd(const std::string& str);
|
||||
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres, QVector<SKeyWord>& kw);
|
||||
private:
|
||||
static QMutex m_mutex;
|
||||
|
|
|
@ -0,0 +1,286 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <QDebug>
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "limonp/Md5.hpp"
|
||||
#include "Unicode.hpp"
|
||||
#include "darts.h"
|
||||
|
||||
namespace cppjieba {
|
||||
|
||||
using std::pair;
|
||||
|
||||
struct DatElement {
|
||||
string word;
|
||||
string tag;
|
||||
double weight = 0;
|
||||
|
||||
bool operator < (const DatElement & b) const {
|
||||
if (word == b.word) {
|
||||
return this->weight > b.weight;
|
||||
}
|
||||
|
||||
return this->word < b.word;
|
||||
}
|
||||
};
|
||||
|
||||
inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
|
||||
return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
|
||||
}
|
||||
|
||||
struct DatMemElem {
|
||||
double weight = 0.0;
|
||||
char tag[8] = {};
|
||||
|
||||
void SetTag(const string & str) {
|
||||
memset(&tag[0], 0, sizeof(tag));
|
||||
strncpy(&tag[0], str.c_str(), std::min(str.size(), sizeof(tag) - 1));
|
||||
}
|
||||
|
||||
string GetTag() const {
|
||||
return &tag[0];
|
||||
}
|
||||
};
|
||||
|
||||
inline std::ostream & operator << (std::ostream& os, const DatMemElem & elem) {
|
||||
return os << "/tag=" << elem.GetTag() << "/weight=" << elem.weight;
|
||||
}
|
||||
|
||||
struct DatDag {
|
||||
limonp::LocalVector<pair<size_t, const DatMemElem *> > nexts;
|
||||
double max_weight;
|
||||
int max_next;
|
||||
};
|
||||
|
||||
typedef Darts::DoubleArray JiebaDAT;
|
||||
|
||||
|
||||
struct CacheFileHeader {
|
||||
char md5_hex[32] = {};
|
||||
double min_weight = 0;
|
||||
uint32_t elements_num = 0;
|
||||
uint32_t dat_size = 0;
|
||||
};
|
||||
|
||||
static_assert(sizeof(DatMemElem) == 16, "DatMemElem length invalid");
|
||||
static_assert((sizeof(CacheFileHeader) % sizeof(DatMemElem)) == 0, "DatMemElem CacheFileHeader length equal");
|
||||
|
||||
|
||||
class DatTrie {
|
||||
public:
|
||||
DatTrie() {}
|
||||
~DatTrie() {
|
||||
::munmap(mmap_addr_, mmap_length_);
|
||||
mmap_addr_ = nullptr;
|
||||
mmap_length_ = 0;
|
||||
|
||||
::close(mmap_fd_);
|
||||
mmap_fd_ = -1;
|
||||
}
|
||||
|
||||
const DatMemElem * Find(const string & key) const {
|
||||
JiebaDAT::result_pair_type find_result;
|
||||
dat_.exactMatchSearch(key.c_str(), find_result);
|
||||
|
||||
if ((0 == find_result.length) || (find_result.value < 0) || (find_result.value >= elements_num_)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &elements_ptr_[ find_result.value ];
|
||||
}
|
||||
|
||||
void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
|
||||
vector<struct DatDag>&res, size_t max_word_len) const {
|
||||
|
||||
res.clear();
|
||||
res.resize(end - begin);
|
||||
|
||||
string text_str;
|
||||
EncodeRunesToString(begin, end, text_str);
|
||||
|
||||
static const size_t max_num = 128;
|
||||
JiebaDAT::result_pair_type result_pairs[max_num] = {};
|
||||
|
||||
for (size_t i = 0, begin_pos = 0; i < size_t(end - begin); i++) {
|
||||
|
||||
std::size_t num_results = dat_.commonPrefixSearch(&text_str[begin_pos], &result_pairs[0], max_num);
|
||||
|
||||
res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + 1, nullptr));
|
||||
|
||||
for (std::size_t idx = 0; idx < num_results; ++idx) {
|
||||
auto & match = result_pairs[idx];
|
||||
|
||||
if ((match.value < 0) || (match.value >= elements_num_)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto const char_num = Utf8CharNum(&text_str[begin_pos], match.length);
|
||||
|
||||
if (char_num > max_word_len) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto pValue = &elements_ptr_[match.value];
|
||||
|
||||
if (1 == char_num) {
|
||||
res[i].nexts[0].second = pValue;
|
||||
continue;
|
||||
}
|
||||
|
||||
res[i].nexts.push_back(pair<size_t, const DatMemElem *>(i + char_num, pValue));
|
||||
}
|
||||
|
||||
begin_pos += limonp::UnicodeToUtf8Bytes((begin + i)->rune);
|
||||
}
|
||||
}
|
||||
|
||||
double GetMinWeight() const {
|
||||
return min_weight_;
|
||||
}
|
||||
|
||||
void SetMinWeight(double d) {
|
||||
min_weight_ = d ;
|
||||
}
|
||||
|
||||
bool InitBuildDat(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
|
||||
BuildDatCache(elements, dat_cache_file, md5);
|
||||
return InitAttachDat(dat_cache_file, md5);
|
||||
}
|
||||
|
||||
bool InitAttachDat(const string & dat_cache_file, const string & md5) {
|
||||
mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
|
||||
|
||||
if (mmap_fd_ < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
|
||||
assert(seek_off >= 0);
|
||||
mmap_length_ = seek_off;
|
||||
|
||||
mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
|
||||
assert(MAP_FAILED != mmap_addr_);
|
||||
|
||||
assert(mmap_length_ >= sizeof(CacheFileHeader));
|
||||
CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
|
||||
elements_num_ = header.elements_num;
|
||||
min_weight_ = header.min_weight;
|
||||
assert(sizeof(header.md5_hex) == md5.size());
|
||||
|
||||
if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(DatMemElem) + header.dat_size * dat_.unit_size());
|
||||
elements_ptr_ = (const DatMemElem *)(mmap_addr_ + sizeof(header));
|
||||
const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(DatMemElem) * elements_num_;
|
||||
dat_.set_array(dat_ptr, header.dat_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
|
||||
std::sort(elements.begin(), elements.end());
|
||||
|
||||
vector<const char*> keys_ptr_vec;
|
||||
vector<int> values_vec;
|
||||
vector<DatMemElem> mem_elem_vec;
|
||||
|
||||
keys_ptr_vec.reserve(elements.size());
|
||||
values_vec.reserve(elements.size());
|
||||
mem_elem_vec.reserve(elements.size());
|
||||
|
||||
CacheFileHeader header;
|
||||
header.min_weight = min_weight_;
|
||||
assert(sizeof(header.md5_hex) == md5.size());
|
||||
memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
|
||||
|
||||
for (size_t i = 0; i < elements.size(); ++i) {
|
||||
keys_ptr_vec.push_back(elements[i].word.data());
|
||||
values_vec.push_back(i);
|
||||
mem_elem_vec.push_back(DatMemElem());
|
||||
auto & mem_elem = mem_elem_vec.back();
|
||||
mem_elem.weight = elements[i].weight;
|
||||
mem_elem.SetTag(elements[i].tag);
|
||||
}
|
||||
|
||||
auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
|
||||
assert(0 == ret);
|
||||
header.elements_num = mem_elem_vec.size();
|
||||
header.dat_size = dat_.size();
|
||||
|
||||
{
|
||||
string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
|
||||
::umask(S_IWGRP | S_IWOTH);
|
||||
//const int fd =::mkstemp(&tmp_filepath[0]);
|
||||
//原mkstemp用法有误,已修复--jxx20210519
|
||||
const int fd =::mkstemp((char *)tmp_filepath.data());
|
||||
qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
|
||||
assert(fd >= 0);
|
||||
::fchmod(fd, 0644);
|
||||
|
||||
auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
|
||||
write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(mem_elem_vec[0]) * mem_elem_vec.size());
|
||||
write_bytes += ::write(fd, dat_.array(), dat_.total_size());
|
||||
|
||||
assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(mem_elem_vec[0]) + dat_.total_size());
|
||||
::close(fd);
|
||||
|
||||
const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
|
||||
assert(0 == rename_ret);
|
||||
}
|
||||
}
|
||||
|
||||
DatTrie(const DatTrie &);
|
||||
DatTrie &operator=(const DatTrie &);
|
||||
|
||||
private:
|
||||
JiebaDAT dat_;
|
||||
const DatMemElem * elements_ptr_ = nullptr;
|
||||
size_t elements_num_ = 0;
|
||||
double min_weight_ = 0;
|
||||
|
||||
int mmap_fd_ = -1;
|
||||
size_t mmap_length_ = 0;
|
||||
char * mmap_addr_ = nullptr;
|
||||
};
|
||||
|
||||
|
||||
inline string CalcFileListMD5(const string & files_list, size_t & file_size_sum) {
|
||||
limonp::MD5 md5;
|
||||
|
||||
const auto files = limonp::Split(files_list, "|;");
|
||||
file_size_sum = 0;
|
||||
|
||||
for (auto const & local_path : files) {
|
||||
const int fd = ::open(local_path.c_str(), O_RDONLY);
|
||||
if( fd < 0){
|
||||
continue;
|
||||
}
|
||||
auto const len = ::lseek(fd, 0, SEEK_END);
|
||||
if (len > 0) {
|
||||
void * addr = ::mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
|
||||
assert(MAP_FAILED != addr);
|
||||
|
||||
md5.Update((unsigned char *) addr, len);
|
||||
file_size_sum += len;
|
||||
|
||||
::munmap(addr, len);
|
||||
}
|
||||
::close(fd);
|
||||
}
|
||||
|
||||
md5.Final();
|
||||
return string(md5.digestChars);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,23 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_DICT_TRIE_HPP
|
||||
#define CPPJIEBA_DICT_TRIE_HPP
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
@ -31,8 +12,8 @@
|
|||
#include "limonp/StringUtil.hpp"
|
||||
#include "limonp/Logging.hpp"
|
||||
#include "Unicode.hpp"
|
||||
#include "Trie.hpp"
|
||||
|
||||
#include "DatTrie.hpp"
|
||||
#include <QDebug>
|
||||
namespace cppjieba {
|
||||
|
||||
using namespace limonp;
|
||||
|
@ -50,58 +31,22 @@ public:
|
|||
WordWeightMax,
|
||||
}; // enum UserWordWeightOption
|
||||
|
||||
DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
|
||||
Init(dict_path, user_dict_paths, user_word_weight_opt);
|
||||
DictTrie(const string& dict_path, const string& user_dict_paths = "", const string & dat_cache_path = "",
|
||||
UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
|
||||
Init(dict_path, user_dict_paths, dat_cache_path, user_word_weight_opt);
|
||||
}
|
||||
|
||||
~DictTrie() {
|
||||
delete trie_;
|
||||
}
|
||||
~DictTrie() {}
|
||||
|
||||
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||
DictUnit node_info;
|
||||
if(!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
|
||||
return false;
|
||||
}
|
||||
active_node_infos_.push_back(node_info);
|
||||
trie_->InsertNode(node_info.word, &active_node_infos_.back());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InsertUserWord(const string& word, int freq, const string& tag = UNKNOWN_TAG) {
|
||||
DictUnit node_info;
|
||||
double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
|
||||
if(!MakeNodeInfo(node_info, word, weight, tag)) {
|
||||
return false;
|
||||
}
|
||||
active_node_infos_.push_back(node_info);
|
||||
trie_->InsertNode(node_info.word, &active_node_infos_.back());
|
||||
return true;
|
||||
}
|
||||
|
||||
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
||||
return trie_->Find(begin, end);
|
||||
const DatMemElem* Find(const string & word) const {
|
||||
return dat_.Find(word);
|
||||
}
|
||||
|
||||
void Find(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end,
|
||||
vector<struct Dag>&res,
|
||||
vector<struct DatDag>&res,
|
||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||
trie_->Find(begin, end, res, max_word_len);
|
||||
}
|
||||
|
||||
bool Find(const string& word) {
|
||||
const DictUnit *tmp = NULL;
|
||||
RuneStrArray runes;
|
||||
if(!DecodeRunesInString(word, runes)) {
|
||||
XLOG(ERROR) << "Decode failed.";
|
||||
}
|
||||
tmp = Find(runes.begin(), runes.end());
|
||||
if(tmp == NULL) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
dat_.Find(begin, end, res, max_word_len);
|
||||
}
|
||||
|
||||
bool IsUserDictSingleChineseWord(const Rune& word) const {
|
||||
|
@ -109,182 +54,176 @@ public:
|
|||
}
|
||||
|
||||
double GetMinWeight() const {
|
||||
return min_weight_;
|
||||
return dat_.GetMinWeight();
|
||||
}
|
||||
|
||||
void InserUserDictNode(const string& line) {
|
||||
size_t GetTotalDictSize() const {
|
||||
return total_dict_size_;
|
||||
}
|
||||
|
||||
void InserUserDictNode(const string& line, bool saveNodeInfo = true) {
|
||||
vector<string> buf;
|
||||
DictUnit node_info;
|
||||
DatElement node_info;
|
||||
Split(line, buf, " ");
|
||||
if(buf.size() == 1) {
|
||||
MakeNodeInfo(node_info,
|
||||
buf[0],
|
||||
user_word_default_weight_,
|
||||
UNKNOWN_TAG);
|
||||
} else if(buf.size() == 2) {
|
||||
MakeNodeInfo(node_info,
|
||||
buf[0],
|
||||
user_word_default_weight_,
|
||||
buf[1]);
|
||||
} else if(buf.size() == 3) {
|
||||
int freq = atoi(buf[1].c_str());
|
||||
assert(freq_sum_ > 0.0);
|
||||
double weight = log(1.0 * freq / freq_sum_);
|
||||
MakeNodeInfo(node_info, buf[0], weight, buf[2]);
|
||||
|
||||
if (buf.size() == 0) {
|
||||
return;
|
||||
}
|
||||
static_node_infos_.push_back(node_info);
|
||||
if(node_info.word.size() == 1) {
|
||||
user_dict_single_chinese_word_.insert(node_info.word[0]);
|
||||
|
||||
node_info.word = buf[0];
|
||||
node_info.weight = user_word_default_weight_;
|
||||
node_info.tag = UNKNOWN_TAG;
|
||||
|
||||
if (buf.size() == 2) {
|
||||
node_info.tag = buf[1];
|
||||
} else if (buf.size() == 3) {
|
||||
if (freq_sum_ > 0.0) {
|
||||
const int freq = atoi(buf[1].c_str());
|
||||
node_info.weight = log(1.0 * freq / freq_sum_);
|
||||
node_info.tag = buf[2];
|
||||
}
|
||||
}
|
||||
|
||||
if (saveNodeInfo) {
|
||||
static_node_infos_.push_back(node_info);
|
||||
}
|
||||
|
||||
if (Utf8CharNum(node_info.word) == 1) {
|
||||
RuneArray word;
|
||||
|
||||
if (DecodeRunesInString(node_info.word, word)) {
|
||||
user_dict_single_chinese_word_.insert(word[0]);
|
||||
} else {
|
||||
XLOG(ERROR) << "Decode " << node_info.word << " failed.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LoadUserDict(const vector<string>& buf) {
|
||||
for(size_t i = 0; i < buf.size(); i++) {
|
||||
InserUserDictNode(buf[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void LoadUserDict(const set<string>& buf) {
|
||||
std::set<string>::const_iterator iter;
|
||||
for(iter = buf.begin(); iter != buf.end(); iter++) {
|
||||
InserUserDictNode(*iter);
|
||||
}
|
||||
}
|
||||
|
||||
void LoadUserDict(const string& filePaths) {
|
||||
void LoadUserDict(const string& filePaths, bool saveNodeInfo = true) {
|
||||
vector<string> files = limonp::Split(filePaths, "|;");
|
||||
size_t lineno = 0;
|
||||
for(size_t i = 0; i < files.size(); i++) {
|
||||
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
ifstream ifs(files[i].c_str());
|
||||
XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
|
||||
string line;
|
||||
|
||||
for(; getline(ifs, line); lineno++) {
|
||||
if(line.size() == 0) {
|
||||
for (; getline(ifs, line);) {
|
||||
if (line.size() == 0) {
|
||||
continue;
|
||||
}
|
||||
InserUserDictNode(line);
|
||||
|
||||
InserUserDictNode(line, saveNodeInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) {
|
||||
LoadDict(dict_path);
|
||||
void Init(const string& dict_path, const string& user_dict_paths, string dat_cache_path,
|
||||
UserWordWeightOption user_word_weight_opt) {
|
||||
const auto dict_list = dict_path + "|" + user_dict_paths;
|
||||
size_t file_size_sum = 0;
|
||||
const string md5 = CalcFileListMD5(dict_list, file_size_sum);
|
||||
|
||||
if (dat_cache_path.empty()) {
|
||||
//未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
|
||||
dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache";
|
||||
}
|
||||
QString path = QString::fromStdString(dat_cache_path);
|
||||
qDebug() << "#########path:" << path;
|
||||
if (dat_.InitAttachDat(dat_cache_path, md5)) {
|
||||
LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_;
|
||||
total_dict_size_ = file_size_sum;
|
||||
return;
|
||||
}
|
||||
|
||||
LoadDefaultDict(dict_path);
|
||||
freq_sum_ = CalcFreqSum(static_node_infos_);
|
||||
CalculateWeight(static_node_infos_, freq_sum_);
|
||||
SetStaticWordWeights(user_word_weight_opt);
|
||||
double min_weight = 0;
|
||||
SetStaticWordWeights(user_word_weight_opt, min_weight);
|
||||
dat_.SetMinWeight(min_weight);
|
||||
|
||||
if(user_dict_paths.size()) {
|
||||
LoadUserDict(user_dict_paths);
|
||||
}
|
||||
Shrink(static_node_infos_);
|
||||
CreateTrie(static_node_infos_);
|
||||
LoadUserDict(user_dict_paths);
|
||||
const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
|
||||
assert(build_ret);
|
||||
total_dict_size_ = file_size_sum;
|
||||
vector<DatElement>().swap(static_node_infos_);
|
||||
}
|
||||
|
||||
void CreateTrie(const vector<DictUnit>& dictUnits) {
|
||||
assert(dictUnits.size());
|
||||
vector<Unicode> words;
|
||||
vector<const DictUnit*> valuePointers;
|
||||
for(size_t i = 0 ; i < dictUnits.size(); i ++) {
|
||||
words.push_back(dictUnits[i].word);
|
||||
valuePointers.push_back(&dictUnits[i]);
|
||||
}
|
||||
|
||||
trie_ = new Trie(words, valuePointers);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
bool MakeNodeInfo(DictUnit& node_info,
|
||||
const string& word,
|
||||
double weight,
|
||||
const string& tag) {
|
||||
if(!DecodeRunesInString(word, node_info.word)) {
|
||||
XLOG(ERROR) << "Decode " << word << " failed.";
|
||||
return false;
|
||||
}
|
||||
node_info.weight = weight;
|
||||
node_info.tag = tag;
|
||||
return true;
|
||||
}
|
||||
|
||||
void LoadDict(const string& filePath) {
|
||||
void LoadDefaultDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
|
||||
string line;
|
||||
vector<string> buf;
|
||||
|
||||
DictUnit node_info;
|
||||
for(size_t lineno = 0; getline(ifs, line); lineno++) {
|
||||
for (; getline(ifs, line);) {
|
||||
Split(line, buf, " ");
|
||||
XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
|
||||
MakeNodeInfo(node_info,
|
||||
buf[0],
|
||||
atof(buf[1].c_str()),
|
||||
buf[2]);
|
||||
DatElement node_info;
|
||||
node_info.word = buf[0];
|
||||
node_info.weight = atof(buf[1].c_str());
|
||||
node_info.tag = buf[2];
|
||||
static_node_infos_.push_back(node_info);
|
||||
}
|
||||
}
|
||||
|
||||
static bool WeightCompare(const DictUnit& lhs, const DictUnit& rhs) {
|
||||
static bool WeightCompare(const DatElement& lhs, const DatElement& rhs) {
|
||||
return lhs.weight < rhs.weight;
|
||||
}
|
||||
|
||||
void SetStaticWordWeights(UserWordWeightOption option) {
|
||||
void SetStaticWordWeights(UserWordWeightOption option, double & min_weight) {
|
||||
XCHECK(!static_node_infos_.empty());
|
||||
vector<DictUnit> x = static_node_infos_;
|
||||
vector<DatElement> x = static_node_infos_;
|
||||
sort(x.begin(), x.end(), WeightCompare);
|
||||
min_weight_ = x[0].weight;
|
||||
max_weight_ = x[x.size() - 1].weight;
|
||||
median_weight_ = x[x.size() / 2].weight;
|
||||
switch(option) {
|
||||
case WordWeightMin:
|
||||
user_word_default_weight_ = min_weight_;
|
||||
break;
|
||||
case WordWeightMedian:
|
||||
user_word_default_weight_ = median_weight_;
|
||||
break;
|
||||
default:
|
||||
user_word_default_weight_ = max_weight_;
|
||||
break;
|
||||
if(x.empty()){
|
||||
return;
|
||||
}
|
||||
min_weight = x[0].weight;
|
||||
const double max_weight_ = x[x.size() - 1].weight;
|
||||
const double median_weight_ = x[x.size() / 2].weight;
|
||||
|
||||
switch (option) {
|
||||
case WordWeightMin:
|
||||
user_word_default_weight_ = min_weight;
|
||||
break;
|
||||
|
||||
case WordWeightMedian:
|
||||
user_word_default_weight_ = median_weight_;
|
||||
break;
|
||||
|
||||
default:
|
||||
user_word_default_weight_ = max_weight_;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
double CalcFreqSum(const vector<DictUnit>& node_infos) const {
|
||||
double CalcFreqSum(const vector<DatElement>& node_infos) const {
|
||||
double sum = 0.0;
|
||||
for(size_t i = 0; i < node_infos.size(); i++) {
|
||||
|
||||
for (size_t i = 0; i < node_infos.size(); i++) {
|
||||
sum += node_infos[i].weight;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
void CalculateWeight(vector<DictUnit>& node_infos, double sum) const {
|
||||
assert(sum > 0.0);
|
||||
for(size_t i = 0; i < node_infos.size(); i++) {
|
||||
DictUnit& node_info = node_infos[i];
|
||||
void CalculateWeight(vector<DatElement>& node_infos, double sum) const {
|
||||
for (size_t i = 0; i < node_infos.size(); i++) {
|
||||
DatElement& node_info = node_infos[i];
|
||||
assert(node_info.weight > 0.0);
|
||||
node_info.weight = log(double(node_info.weight) / sum);
|
||||
}
|
||||
}
|
||||
|
||||
void Shrink(vector<DictUnit>& units) const {
|
||||
vector<DictUnit>(units.begin(), units.end()).swap(units);
|
||||
}
|
||||
|
||||
vector<DictUnit> static_node_infos_;
|
||||
deque<DictUnit> active_node_infos_; // must not be vector
|
||||
Trie * trie_;
|
||||
private:
|
||||
vector<DatElement> static_node_infos_;
|
||||
size_t total_dict_size_ = 0;
|
||||
DatTrie dat_;
|
||||
|
||||
double freq_sum_;
|
||||
double min_weight_;
|
||||
double max_weight_;
|
||||
double median_weight_;
|
||||
double user_word_default_weight_;
|
||||
unordered_set<Rune> user_dict_single_chinese_word_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,23 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_FULLSEGMENT_H
|
||||
#define CPPJIEBA_FULLSEGMENT_H
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
|
@ -30,82 +11,45 @@
|
|||
namespace cppjieba {
|
||||
class FullSegment: public SegmentBase {
|
||||
public:
|
||||
FullSegment(const string& dictPath) {
|
||||
dictTrie_ = new DictTrie(dictPath);
|
||||
isNeedDestroy_ = true;
|
||||
}
|
||||
FullSegment(const DictTrie* dictTrie)
|
||||
: dictTrie_(dictTrie), isNeedDestroy_(false) {
|
||||
: dictTrie_(dictTrie) {
|
||||
assert(dictTrie_);
|
||||
}
|
||||
~FullSegment() {
|
||||
if(isNeedDestroy_) {
|
||||
delete dictTrie_;
|
||||
}
|
||||
}
|
||||
void Cut(const string& sentence,
|
||||
vector<string>& words) const {
|
||||
vector<Word> tmp;
|
||||
Cut(sentence, tmp);
|
||||
GetStringsFromWords(tmp, words);
|
||||
}
|
||||
void Cut(const string& sentence,
|
||||
vector<Word>& words) const {
|
||||
PreFilter pre_filter(symbols_, sentence);
|
||||
PreFilter::Range range;
|
||||
vector<WordRange> wrs;
|
||||
wrs.reserve(sentence.size() / 2);
|
||||
while(pre_filter.HasNext()) {
|
||||
range = pre_filter.Next();
|
||||
Cut(range.begin, range.end, wrs);
|
||||
}
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetWordsFromWordRanges(sentence, wrs, words);
|
||||
}
|
||||
void Cut(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end,
|
||||
vector<WordRange>& res) const {
|
||||
// result of searching in trie tree
|
||||
LocalVector<pair<size_t, const DictUnit*> > tRes;
|
||||
~FullSegment() { }
|
||||
|
||||
// max index of res's words
|
||||
size_t maxIdx = 0;
|
||||
|
||||
// always equals to (uItr - begin)
|
||||
size_t uIdx = 0;
|
||||
|
||||
// tmp variables
|
||||
size_t wordLen = 0;
|
||||
virtual void Cut(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end,
|
||||
vector<WordRange>& res, bool, size_t) const override {
|
||||
assert(dictTrie_);
|
||||
vector<struct Dag> dags;
|
||||
vector<struct DatDag> dags;
|
||||
dictTrie_->Find(begin, end, dags);
|
||||
for(size_t i = 0; i < dags.size(); i++) {
|
||||
for(size_t j = 0; j < dags[i].nexts.size(); j++) {
|
||||
size_t nextoffset = dags[i].nexts[j].first;
|
||||
size_t max_word_end_pos = 0;
|
||||
|
||||
for (size_t i = 0; i < dags.size(); i++) {
|
||||
for (const auto & kv : dags[i].nexts) {
|
||||
const size_t nextoffset = kv.first - 1;
|
||||
assert(nextoffset < dags.size());
|
||||
const DictUnit* du = dags[i].nexts[j].second;
|
||||
if(du == NULL) {
|
||||
if(dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
|
||||
WordRange wr(begin + i, begin + nextoffset);
|
||||
res.push_back(wr);
|
||||
}
|
||||
} else {
|
||||
wordLen = du->word.size();
|
||||
if(wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
|
||||
WordRange wr(begin + i, begin + nextoffset);
|
||||
res.push_back(wr);
|
||||
}
|
||||
const auto wordLen = nextoffset - i + 1;
|
||||
const bool is_not_covered_single_word = ((dags[i].nexts.size() == 1) && (max_word_end_pos <= i));
|
||||
const bool is_oov = (nullptr == kv.second); //Out-of-Vocabulary
|
||||
|
||||
if ((is_not_covered_single_word) || ((not is_oov) && (wordLen >= 2))) {
|
||||
WordRange wr(begin + i, begin + nextoffset);
|
||||
res.push_back(wr);
|
||||
}
|
||||
maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx;
|
||||
|
||||
max_word_end_pos = max(max_word_end_pos, nextoffset + 1);
|
||||
}
|
||||
uIdx++;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||
size_t) const override {
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
const DictTrie* dictTrie_;
|
||||
bool isNeedDestroy_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,26 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_HMMMODEL_H
|
||||
#define CPPJIEBA_HMMMODEL_H
|
||||
#pragma once
|
||||
|
||||
#include "limonp/StringUtil.hpp"
|
||||
#include "Trie.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
|
||||
|
@ -59,16 +39,18 @@ struct HMMModel {
|
|||
XCHECK(GetLine(ifile, line));
|
||||
Split(line, tmp, " ");
|
||||
XCHECK(tmp.size() == STATUS_SUM);
|
||||
for(size_t j = 0; j < tmp.size(); j++) {
|
||||
|
||||
for (size_t j = 0; j < tmp.size(); j++) {
|
||||
startProb[j] = atof(tmp[j].c_str());
|
||||
}
|
||||
|
||||
//Load transProb
|
||||
for(size_t i = 0; i < STATUS_SUM; i++) {
|
||||
for (size_t i = 0; i < STATUS_SUM; i++) {
|
||||
XCHECK(GetLine(ifile, line));
|
||||
Split(line, tmp, " ");
|
||||
XCHECK(tmp.size() == STATUS_SUM);
|
||||
for(size_t j = 0; j < STATUS_SUM; j++) {
|
||||
|
||||
for (size_t j = 0; j < tmp.size(); j++) {
|
||||
transProb[i][j] = atof(tmp[j].c_str());
|
||||
}
|
||||
}
|
||||
|
@ -92,43 +74,55 @@ struct HMMModel {
|
|||
double GetEmitProb(const EmitProbMap* ptMp, Rune key,
|
||||
double defVal)const {
|
||||
EmitProbMap::const_iterator cit = ptMp->find(key);
|
||||
if(cit == ptMp->end()) {
|
||||
|
||||
if (cit == ptMp->end()) {
|
||||
return defVal;
|
||||
}
|
||||
|
||||
return cit->second;
|
||||
}
|
||||
bool GetLine(ifstream& ifile, string& line) {
|
||||
while(getline(ifile, line)) {
|
||||
while (getline(ifile, line)) {
|
||||
Trim(line);
|
||||
if(line.empty()) {
|
||||
|
||||
if (line.empty()) {
|
||||
continue;
|
||||
}
|
||||
if(StartsWith(line, "#")) {
|
||||
|
||||
if (StartsWith(line, "#")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
bool LoadEmitProb(const string& line, EmitProbMap& mp) {
|
||||
if(line.empty()) {
|
||||
if (line.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<string> tmp, tmp2;
|
||||
Unicode unicode;
|
||||
RuneArray unicode;
|
||||
Split(line, tmp, ",");
|
||||
for(size_t i = 0; i < tmp.size(); i++) {
|
||||
|
||||
for (size_t i = 0; i < tmp.size(); i++) {
|
||||
Split(tmp[i], tmp2, ":");
|
||||
if(2 != tmp2.size()) {
|
||||
|
||||
if (2 != tmp2.size()) {
|
||||
XLOG(ERROR) << "emitProb illegal.";
|
||||
return false;
|
||||
}
|
||||
if(!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
|
||||
|
||||
if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
|
||||
XLOG(ERROR) << "TransCode failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
mp[unicode[0]] = atof(tmp2[1].c_str());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -144,4 +138,3 @@ struct HMMModel {
|
|||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,23 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIBEA_HMMSEGMENT_H
|
||||
#define CPPJIBEA_HMMSEGMENT_H
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
@ -29,58 +10,40 @@
|
|||
namespace cppjieba {
|
||||
class HMMSegment: public SegmentBase {
|
||||
public:
|
||||
HMMSegment(const string& filePath)
|
||||
: model_(new HMMModel(filePath)), isNeedDestroy_(true) {
|
||||
}
|
||||
HMMSegment(const HMMModel* model)
|
||||
: model_(model), isNeedDestroy_(false) {
|
||||
}
|
||||
~HMMSegment() {
|
||||
if(isNeedDestroy_) {
|
||||
delete model_;
|
||||
}
|
||||
: model_(model) {
|
||||
}
|
||||
~HMMSegment() { }
|
||||
|
||||
void Cut(const string& sentence,
|
||||
vector<string>& words) const {
|
||||
vector<Word> tmp;
|
||||
Cut(sentence, tmp);
|
||||
GetStringsFromWords(tmp, words);
|
||||
}
|
||||
void Cut(const string& sentence,
|
||||
vector<Word>& words) const {
|
||||
PreFilter pre_filter(symbols_, sentence);
|
||||
PreFilter::Range range;
|
||||
vector<WordRange> wrs;
|
||||
wrs.reserve(sentence.size() / 2);
|
||||
while(pre_filter.HasNext()) {
|
||||
range = pre_filter.Next();
|
||||
Cut(range.begin, range.end, wrs);
|
||||
}
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetWordsFromWordRanges(sentence, wrs, words);
|
||||
}
|
||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
|
||||
virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool,
|
||||
size_t) const override {
|
||||
RuneStrArray::const_iterator left = begin;
|
||||
RuneStrArray::const_iterator right = begin;
|
||||
while(right != end) {
|
||||
if(right->rune < 0x80) {
|
||||
if(left != right) {
|
||||
|
||||
while (right != end) {
|
||||
if (right->rune < 0x80) {
|
||||
if (left != right) {
|
||||
InternalCut(left, right, res);
|
||||
}
|
||||
|
||||
left = right;
|
||||
|
||||
do {
|
||||
right = SequentialLetterRule(left, end);
|
||||
if(right != left) {
|
||||
|
||||
if (right != left) {
|
||||
break;
|
||||
}
|
||||
|
||||
right = NumbersRule(left, end);
|
||||
if(right != left) {
|
||||
|
||||
if (right != left) {
|
||||
break;
|
||||
}
|
||||
|
||||
right ++;
|
||||
} while(false);
|
||||
} while (false);
|
||||
|
||||
WordRange wr(left, right - 1);
|
||||
res.push_back(wr);
|
||||
left = right;
|
||||
|
@ -88,45 +51,61 @@ public:
|
|||
right++;
|
||||
}
|
||||
}
|
||||
if(left != right) {
|
||||
|
||||
if (left != right) {
|
||||
InternalCut(left, right, res);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||
size_t) const override {
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
// sequential letters rule
|
||||
RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
||||
RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end) const {
|
||||
Rune x = begin->rune;
|
||||
if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
|
||||
|
||||
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
|
||||
begin ++;
|
||||
} else {
|
||||
return begin;
|
||||
}
|
||||
while(begin != end) {
|
||||
|
||||
while (begin != end) {
|
||||
x = begin->rune;
|
||||
if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
|
||||
|
||||
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
|
||||
begin ++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return begin;
|
||||
}
|
||||
//
|
||||
RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
||||
Rune x = begin->rune;
|
||||
if('0' <= x && x <= '9') {
|
||||
|
||||
if ('0' <= x && x <= '9') {
|
||||
begin ++;
|
||||
} else {
|
||||
return begin;
|
||||
}
|
||||
while(begin != end) {
|
||||
|
||||
while (begin != end) {
|
||||
x = begin->rune;
|
||||
if(('0' <= x && x <= '9') || x == '.') {
|
||||
|
||||
if (('0' <= x && x <= '9') || x == '.') {
|
||||
begin++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return begin;
|
||||
}
|
||||
void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
|
||||
|
@ -135,8 +114,9 @@ private:
|
|||
|
||||
RuneStrArray::const_iterator left = begin;
|
||||
RuneStrArray::const_iterator right;
|
||||
for(size_t i = 0; i < status.size(); i++) {
|
||||
if(status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
|
||||
|
||||
for (size_t i = 0; i < status.size(); i++) {
|
||||
if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
|
||||
right = begin + i + 1;
|
||||
WordRange wr(left, right - 1);
|
||||
res.push_back(wr);
|
||||
|
@ -159,23 +139,25 @@ private:
|
|||
vector<double> weight(XYSize);
|
||||
|
||||
//start
|
||||
for(size_t y = 0; y < Y; y++) {
|
||||
for (size_t y = 0; y < Y; y++) {
|
||||
weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
|
||||
path[0 + y * X] = -1;
|
||||
}
|
||||
|
||||
double emitProb;
|
||||
|
||||
for(size_t x = 1; x < X; x++) {
|
||||
for(size_t y = 0; y < Y; y++) {
|
||||
for (size_t x = 1; x < X; x++) {
|
||||
for (size_t y = 0; y < Y; y++) {
|
||||
now = x + y * X;
|
||||
weight[now] = MIN_DOUBLE;
|
||||
path[now] = HMMModel::E; // warning
|
||||
emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE);
|
||||
for(size_t preY = 0; preY < Y; preY++) {
|
||||
|
||||
for (size_t preY = 0; preY < Y; preY++) {
|
||||
old = x - 1 + preY * X;
|
||||
tmp = weight[old] + model_->transProb[preY][y] + emitProb;
|
||||
if(tmp > weight[now]) {
|
||||
|
||||
if (tmp > weight[now]) {
|
||||
weight[now] = tmp;
|
||||
path[now] = preY;
|
||||
}
|
||||
|
@ -186,23 +168,23 @@ private:
|
|||
endE = weight[X - 1 + HMMModel::E * X];
|
||||
endS = weight[X - 1 + HMMModel::S * X];
|
||||
stat = 0;
|
||||
if(endE >= endS) {
|
||||
|
||||
if (endE >= endS) {
|
||||
stat = HMMModel::E;
|
||||
} else {
|
||||
stat = HMMModel::S;
|
||||
}
|
||||
|
||||
status.resize(X);
|
||||
for(int x = X - 1 ; x >= 0; x--) {
|
||||
|
||||
for (int x = X - 1 ; x >= 0; x--) {
|
||||
status[x] = stat;
|
||||
stat = path[x + stat * X];
|
||||
}
|
||||
}
|
||||
|
||||
const HMMModel* model_;
|
||||
bool isNeedDestroy_;
|
||||
}; // class HMMSegment
|
||||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,24 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEAB_JIEBA_H
|
||||
#define CPPJIEAB_JIEBA_H
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "QuerySegment.hpp"
|
||||
#include "KeywordExtractor.hpp"
|
||||
|
||||
|
@ -29,56 +11,48 @@ public:
|
|||
Jieba(const string& dict_path,
|
||||
const string& model_path,
|
||||
const string& user_dict_path,
|
||||
const string& idfPath,
|
||||
const string& stopWordPath)
|
||||
: dict_trie_(dict_path, user_dict_path),
|
||||
const string& idfPath = "",
|
||||
const string& stopWordPath = "",
|
||||
const string& dat_cache_path = "")
|
||||
: dict_trie_(dict_path, user_dict_path, dat_cache_path),
|
||||
model_(model_path),
|
||||
mp_seg_(&dict_trie_),
|
||||
hmm_seg_(&model_),
|
||||
mix_seg_(&dict_trie_, &model_),
|
||||
full_seg_(&dict_trie_),
|
||||
query_seg_(&dict_trie_, &model_),
|
||||
extractor(&dict_trie_, &model_, idfPath, stopWordPath) {
|
||||
|
||||
}
|
||||
~Jieba() {
|
||||
}
|
||||
|
||||
struct LocWord {
|
||||
string word;
|
||||
size_t begin;
|
||||
size_t end;
|
||||
}; // struct LocWord
|
||||
extractor(&dict_trie_, &model_, idfPath, stopWordPath){ }
|
||||
~Jieba() { }
|
||||
|
||||
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||
mix_seg_.Cut(sentence, words, hmm);
|
||||
mix_seg_.CutToStr(sentence, words, hmm);
|
||||
}
|
||||
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
||||
mix_seg_.Cut(sentence, words, hmm);
|
||||
mix_seg_.CutToWord(sentence, words, hmm);
|
||||
}
|
||||
void CutAll(const string& sentence, vector<string>& words) const {
|
||||
full_seg_.Cut(sentence, words);
|
||||
full_seg_.CutToStr(sentence, words);
|
||||
}
|
||||
void CutAll(const string& sentence, vector<Word>& words) const {
|
||||
full_seg_.Cut(sentence, words);
|
||||
full_seg_.CutToWord(sentence, words);
|
||||
}
|
||||
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||
query_seg_.Cut(sentence, words, hmm);
|
||||
query_seg_.CutToStr(sentence, words, hmm);
|
||||
}
|
||||
void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
||||
query_seg_.Cut(sentence, words, hmm);
|
||||
query_seg_.CutToWord(sentence, words, hmm);
|
||||
}
|
||||
void CutHMM(const string& sentence, vector<string>& words) const {
|
||||
hmm_seg_.Cut(sentence, words);
|
||||
hmm_seg_.CutToStr(sentence, words);
|
||||
}
|
||||
void CutHMM(const string& sentence, vector<Word>& words) const {
|
||||
hmm_seg_.Cut(sentence, words);
|
||||
hmm_seg_.CutToWord(sentence, words);
|
||||
}
|
||||
void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
|
||||
mp_seg_.Cut(sentence, words, max_word_len);
|
||||
mp_seg_.CutToStr(sentence, words, false, max_word_len);
|
||||
}
|
||||
void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
|
||||
mp_seg_.Cut(sentence, words, max_word_len);
|
||||
mp_seg_.CutToWord(sentence, words, false, max_word_len);
|
||||
}
|
||||
|
||||
void Tag(const string& sentence, vector<pair<string, string> >& words) const {
|
||||
|
@ -87,16 +61,8 @@ public:
|
|||
string LookupTag(const string &str) const {
|
||||
return mix_seg_.LookupTag(str);
|
||||
}
|
||||
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||
return dict_trie_.InsertUserWord(word, tag);
|
||||
}
|
||||
|
||||
bool InsertUserWord(const string& word, int freq, const string& tag = UNKNOWN_TAG) {
|
||||
return dict_trie_.InsertUserWord(word, freq, tag);
|
||||
}
|
||||
|
||||
bool Find(const string& word) {
|
||||
return dict_trie_.Find(word);
|
||||
return nullptr != dict_trie_.Find(word);
|
||||
}
|
||||
|
||||
void ResetSeparators(const string& s) {
|
||||
|
@ -116,18 +82,6 @@ public:
|
|||
return &model_;
|
||||
}
|
||||
|
||||
void LoadUserDict(const vector<string>& buf) {
|
||||
dict_trie_.LoadUserDict(buf);
|
||||
}
|
||||
|
||||
void LoadUserDict(const set<string>& buf) {
|
||||
dict_trie_.LoadUserDict(buf);
|
||||
}
|
||||
|
||||
void LoadUserDict(const string& path) {
|
||||
dict_trie_.LoadUserDict(path);
|
||||
}
|
||||
|
||||
private:
|
||||
DictTrie dict_trie_;
|
||||
HMMModel model_;
|
||||
|
@ -145,4 +99,3 @@ public:
|
|||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif // CPPJIEAB_JIEBA_H
|
||||
|
|
|
@ -1,23 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
|
||||
#define CPPJIEBA_KEYWORD_EXTRACTOR_H
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include <set>
|
||||
|
@ -37,15 +18,6 @@ public:
|
|||
double weight;
|
||||
}; // struct Word
|
||||
|
||||
KeywordExtractor(const string& dictPath,
|
||||
const string& hmmFilePath,
|
||||
const string& idfPath,
|
||||
const string& stopWordPath,
|
||||
const string& userDict = "")
|
||||
: segment_(dictPath, hmmFilePath, userDict) {
|
||||
LoadIdfDict(idfPath);
|
||||
LoadStopWordDict(stopWordPath);
|
||||
}
|
||||
KeywordExtractor(const DictTrie* dictTrie,
|
||||
const HMMModel* model,
|
||||
const string& idfPath,
|
||||
|
@ -60,7 +32,8 @@ public:
|
|||
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
||||
vector<Word> topWords;
|
||||
Extract(sentence, topWords, topN);
|
||||
for(size_t i = 0; i < topWords.size(); i++) {
|
||||
|
||||
for (size_t i = 0; i < topWords.size(); i++) {
|
||||
keywords.push_back(topWords[i].word);
|
||||
}
|
||||
}
|
||||
|
@ -68,43 +41,52 @@ public:
|
|||
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
|
||||
vector<Word> topWords;
|
||||
Extract(sentence, topWords, topN);
|
||||
for(size_t i = 0; i < topWords.size(); i++) {
|
||||
|
||||
for (size_t i = 0; i < topWords.size(); i++) {
|
||||
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
|
||||
}
|
||||
}
|
||||
|
||||
void Extract(const string& sentence, vector<Word>& keywords, size_t topN) const {
|
||||
vector<string> words;
|
||||
segment_.Cut(sentence, words);
|
||||
segment_.CutToStr(sentence, words);//将字符串string分解为words放入vector
|
||||
|
||||
map<string, Word> wordmap;
|
||||
map<string, Word> wordmap;//插入字符串与Word的map,相同string统计词频叠加权重
|
||||
size_t offset = 0;
|
||||
for(size_t i = 0; i < words.size(); ++i) {
|
||||
|
||||
for (size_t i = 0; i < words.size(); ++i) {
|
||||
size_t t = offset;
|
||||
offset += words[i].size();
|
||||
if(IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
|
||||
|
||||
if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
wordmap[words[i]].offsets.push_back(t);
|
||||
wordmap[words[i]].weight += 1.0;
|
||||
}
|
||||
if(offset != sentence.size()) {
|
||||
|
||||
if (offset != sentence.size()) {
|
||||
XLOG(ERROR) << "words illegal";
|
||||
return;
|
||||
}
|
||||
|
||||
keywords.clear();
|
||||
keywords.reserve(wordmap.size());
|
||||
for(map<string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
|
||||
unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);
|
||||
if(cit != idfMap_.end()) {
|
||||
|
||||
for (map<string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
|
||||
unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);//IDF词典查找
|
||||
|
||||
if (cit != idfMap_.end()) {
|
||||
itr->second.weight *= cit->second;
|
||||
} else {
|
||||
itr->second.weight *= idfAverage_;
|
||||
}
|
||||
|
||||
itr->second.word = itr->first;
|
||||
keywords.push_back(itr->second);
|
||||
}
|
||||
|
||||
topN = min(topN, keywords.size());
|
||||
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
|
||||
keywords.resize(topN);
|
||||
|
@ -112,23 +94,31 @@ public:
|
|||
private:
|
||||
void LoadIdfDict(const string& idfPath) {
|
||||
ifstream ifs(idfPath.c_str());
|
||||
if(not ifs.is_open()){
|
||||
return ;
|
||||
}
|
||||
XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
|
||||
string line ;
|
||||
vector<string> buf;
|
||||
double idf = 0.0;
|
||||
double idfSum = 0.0;
|
||||
size_t lineno = 0;
|
||||
for(; getline(ifs, line); lineno++) {
|
||||
|
||||
for (; getline(ifs, line); lineno++) {
|
||||
buf.clear();
|
||||
if(line.empty()) {
|
||||
|
||||
if (line.empty()) {
|
||||
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
|
||||
continue;
|
||||
}
|
||||
|
||||
Split(line, buf, " ");
|
||||
if(buf.size() != 2) {
|
||||
|
||||
if (buf.size() != 2) {
|
||||
XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
|
||||
continue;
|
||||
}
|
||||
|
||||
idf = atof(buf[1].c_str());
|
||||
idfMap_[buf[0]] = idf;
|
||||
idfSum += idf;
|
||||
|
@ -141,11 +131,16 @@ private:
|
|||
}
|
||||
void LoadStopWordDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
if(not ifs.is_open()){
|
||||
return ;
|
||||
}
|
||||
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
|
||||
string line ;
|
||||
while(getline(ifs, line)) {
|
||||
|
||||
while (getline(ifs, line)) {
|
||||
stopWords_.insert(line);
|
||||
}
|
||||
|
||||
assert(stopWords_.size());
|
||||
}
|
||||
|
||||
|
@ -161,11 +156,11 @@ private:
|
|||
}; // class KeywordExtractor
|
||||
|
||||
inline ostream& operator << (ostream& os, const KeywordExtractor::Word& word) {
|
||||
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}";
|
||||
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
|
||||
"}";
|
||||
}
|
||||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
@ -1,23 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_MPSEGMENT_H
|
||||
#define CPPJIEBA_MPSEGMENT_H
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
|
@ -31,63 +12,32 @@ namespace cppjieba {
|
|||
|
||||
class MPSegment: public SegmentTagged {
|
||||
public:
|
||||
MPSegment(const string& dictPath, const string& userDictPath = "")
|
||||
: dictTrie_(new DictTrie(dictPath, userDictPath)), isNeedDestroy_(true) {
|
||||
}
|
||||
MPSegment(const DictTrie* dictTrie)
|
||||
: dictTrie_(dictTrie), isNeedDestroy_(false) {
|
||||
: dictTrie_(dictTrie) {
|
||||
assert(dictTrie_);
|
||||
}
|
||||
~MPSegment() {
|
||||
if(isNeedDestroy_) {
|
||||
delete dictTrie_;
|
||||
}
|
||||
~MPSegment() { }
|
||||
|
||||
virtual void Cut(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end,
|
||||
vector<WordRange>& words,
|
||||
bool, size_t max_word_len) const override {
|
||||
vector<DatDag> dags;
|
||||
dictTrie_->Find(begin, end, dags, max_word_len);//依据DAG词典生成DAG--jxx
|
||||
CalcDP(dags);//动态规划(Dynamic Programming,DP),根据DAG计算最优动态规划路径--jxx
|
||||
CutByDag(begin, end, dags, words);//依据DAG最优路径分词--jxx
|
||||
}
|
||||
|
||||
void Cut(const string& sentence, vector<string>& words) const {
|
||||
Cut(sentence, words, MAX_WORD_LENGTH);
|
||||
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||
size_t) const override {
|
||||
|
||||
}
|
||||
|
||||
void Cut(const string& sentence,
|
||||
vector<string>& words,
|
||||
size_t max_word_len) const {
|
||||
vector<Word> tmp;
|
||||
Cut(sentence, tmp, max_word_len);
|
||||
GetStringsFromWords(tmp, words);
|
||||
}
|
||||
void Cut(const string& sentence,
|
||||
vector<Word>& words,
|
||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||
PreFilter pre_filter(symbols_, sentence);
|
||||
PreFilter::Range range;
|
||||
vector<WordRange> wrs;
|
||||
wrs.reserve(sentence.size() / 2);
|
||||
while(pre_filter.HasNext()) {
|
||||
range = pre_filter.Next();
|
||||
Cut(range.begin, range.end, wrs, max_word_len);
|
||||
}
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetWordsFromWordRanges(sentence, wrs, words);
|
||||
}
|
||||
void Cut(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end,
|
||||
vector<WordRange>& words,
|
||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||
vector<Dag> dags;
|
||||
dictTrie_->Find(begin,
|
||||
end,
|
||||
dags,
|
||||
max_word_len);
|
||||
CalcDP(dags);
|
||||
CutByDag(begin, end, dags, words);
|
||||
}
|
||||
|
||||
const DictTrie* GetDictTrie() const {
|
||||
const DictTrie* GetDictTrie() const override {
|
||||
return dictTrie_;
|
||||
}
|
||||
|
||||
bool Tag(const string& src, vector<pair<string, string> >& res) const {
|
||||
bool Tag(const string& src, vector<pair<string, string> >& res) const override {
|
||||
return tagger_.Tag(src, res, *this);
|
||||
}
|
||||
|
||||
|
@ -95,61 +45,50 @@ public:
|
|||
return dictTrie_->IsUserDictSingleChineseWord(value);
|
||||
}
|
||||
private:
|
||||
void CalcDP(vector<Dag>& dags) const {
|
||||
size_t nextPos;
|
||||
const DictUnit* p;
|
||||
double val;
|
||||
void CalcDP(vector<DatDag>& dags) const {
|
||||
for (auto rit = dags.rbegin(); rit != dags.rend(); rit++) {
|
||||
rit->max_next = -1;
|
||||
rit->max_weight = MIN_DOUBLE;
|
||||
|
||||
for(vector<Dag>::reverse_iterator rit = dags.rbegin(); rit != dags.rend(); rit++) {
|
||||
rit->pInfo = NULL;
|
||||
rit->weight = MIN_DOUBLE;
|
||||
assert(!rit->nexts.empty());
|
||||
for(LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) {
|
||||
nextPos = it->first;
|
||||
p = it->second;
|
||||
val = 0.0;
|
||||
if(nextPos + 1 < dags.size()) {
|
||||
val += dags[nextPos + 1].weight;
|
||||
for (const auto & it : rit->nexts) {
|
||||
const auto nextPos = it.first;
|
||||
double val = dictTrie_->GetMinWeight();
|
||||
|
||||
if (nullptr != it.second) {
|
||||
val = it.second->weight;
|
||||
}
|
||||
|
||||
if(p) {
|
||||
val += p->weight;
|
||||
} else {
|
||||
val += dictTrie_->GetMinWeight();
|
||||
if (nextPos < dags.size()) {
|
||||
val += dags[nextPos].max_weight;
|
||||
}
|
||||
if(val > rit->weight) {
|
||||
rit->pInfo = p;
|
||||
rit->weight = val;
|
||||
|
||||
if ((nextPos <= dags.size()) && (val > rit->max_weight)) {
|
||||
rit->max_weight = val;
|
||||
rit->max_next = nextPos;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CutByDag(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end,
|
||||
const vector<Dag>& dags,
|
||||
RuneStrArray::const_iterator,
|
||||
const vector<DatDag>& dags,
|
||||
vector<WordRange>& words) const {
|
||||
size_t i = 0;
|
||||
while(i < dags.size()) {
|
||||
const DictUnit* p = dags[i].pInfo;
|
||||
if(p) {
|
||||
assert(p->word.size() >= 1);
|
||||
WordRange wr(begin + i, begin + i + p->word.size() - 1);
|
||||
words.push_back(wr);
|
||||
i += p->word.size();
|
||||
} else { //single chinese word
|
||||
WordRange wr(begin + i, begin + i);
|
||||
words.push_back(wr);
|
||||
i++;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < dags.size();) {
|
||||
const auto next = dags[i].max_next;
|
||||
assert(next > i);
|
||||
assert(next <= dags.size());
|
||||
WordRange wr(begin + i, begin + next - 1);
|
||||
words.push_back(wr);
|
||||
i = next;
|
||||
}
|
||||
}
|
||||
|
||||
const DictTrie* dictTrie_;
|
||||
bool isNeedDestroy_;
|
||||
PosTagger tagger_;
|
||||
|
||||
}; // class MPSegment
|
||||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,23 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_MIXSEGMENT_H
|
||||
#define CPPJIEBA_MIXSEGMENT_H
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include "MPSegment.hpp"
|
||||
|
@ -28,70 +9,49 @@
|
|||
namespace cppjieba {
|
||||
class MixSegment: public SegmentTagged {
|
||||
public:
|
||||
MixSegment(const string& mpSegDict, const string& hmmSegDict,
|
||||
const string& userDict = "")
|
||||
: mpSeg_(mpSegDict, userDict),
|
||||
hmmSeg_(hmmSegDict) {
|
||||
}
|
||||
MixSegment(const DictTrie* dictTrie, const HMMModel* model)
|
||||
: mpSeg_(dictTrie), hmmSeg_(model) {
|
||||
}
|
||||
~MixSegment() {
|
||||
}
|
||||
~MixSegment() {}
|
||||
|
||||
void Cut(const string& sentence, vector<string>& words) const {
|
||||
Cut(sentence, words, true);
|
||||
}
|
||||
void Cut(const string& sentence, vector<string>& words, bool hmm) const {
|
||||
vector<Word> tmp;
|
||||
Cut(sentence, tmp, hmm);
|
||||
GetStringsFromWords(tmp, words);
|
||||
}
|
||||
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
||||
PreFilter pre_filter(symbols_, sentence);
|
||||
PreFilter::Range range;
|
||||
vector<WordRange> wrs;
|
||||
wrs.reserve(sentence.size() / 2);
|
||||
while(pre_filter.HasNext()) {
|
||||
range = pre_filter.Next();
|
||||
Cut(range.begin, range.end, wrs, hmm);
|
||||
}
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetWordsFromWordRanges(sentence, wrs, words);
|
||||
}
|
||||
|
||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
|
||||
if(!hmm) {
|
||||
mpSeg_.Cut(begin, end, res);
|
||||
virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
|
||||
size_t) const override {
|
||||
if (!hmm) {
|
||||
mpSeg_.CutRuneArray(begin, end, res);
|
||||
return;
|
||||
}
|
||||
|
||||
vector<WordRange> words;
|
||||
assert(end >= begin);
|
||||
words.reserve(end - begin);
|
||||
mpSeg_.Cut(begin, end, words);
|
||||
mpSeg_.CutRuneArray(begin, end, words);
|
||||
|
||||
vector<WordRange> hmmRes;
|
||||
hmmRes.reserve(end - begin);
|
||||
for(size_t i = 0; i < words.size(); i++) {
|
||||
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
//if mp Get a word, it's ok, put it into result
|
||||
if(words[i].left != words[i].right || (words[i].left == words[i].right && mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
|
||||
if (words[i].left != words[i].right || (words[i].left == words[i].right &&
|
||||
mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
|
||||
res.push_back(words[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// if mp Get a single one and it is not in userdict, collect it in sequence
|
||||
size_t j = i;
|
||||
while(j < words.size() && words[j].left == words[j].right && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
|
||||
|
||||
while (j < words.size() && words[j].left == words[j].right &&
|
||||
!mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
|
||||
j++;
|
||||
}
|
||||
|
||||
// Cut the sequence with hmm
|
||||
assert(j - 1 >= i);
|
||||
// TODO
|
||||
hmmSeg_.Cut(words[i].left, words[j - 1].left + 1, hmmRes);
|
||||
hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
|
||||
|
||||
//put hmm result to result
|
||||
for(size_t k = 0; k < hmmRes.size(); k++) {
|
||||
for (size_t k = 0; k < hmmRes.size(); k++) {
|
||||
res.push_back(hmmRes[k]);
|
||||
}
|
||||
|
||||
|
@ -103,11 +63,61 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
const DictTrie* GetDictTrie() const {
|
||||
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||
size_t) const override {
|
||||
//目前hmm默认开启,后期如有需要关闭再修改--jxx20210519
|
||||
// if (!hmm) {
|
||||
// mpSeg_.CutRuneArray(begin, end, res);
|
||||
// return;
|
||||
// }
|
||||
|
||||
vector<WordRange> words;
|
||||
assert(end >= begin);
|
||||
words.reserve(end - begin);
|
||||
mpSeg_.CutRuneArray(begin, end, words);
|
||||
|
||||
vector<WordRange> hmmRes;
|
||||
hmmRes.reserve(end - begin);
|
||||
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
//if mp Get a word, it's ok, put it into result
|
||||
if (words[i].left != words[i].right || (words[i].left == words[i].right &&
|
||||
mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
|
||||
res.push_back(GetStringFromRunes(s, words[i].left, words[i].right));
|
||||
continue;
|
||||
}
|
||||
|
||||
// if mp Get a single one and it is not in userdict, collect it in sequence
|
||||
size_t j = i;
|
||||
|
||||
while (j < words.size() && words[j].left == words[j].right &&
|
||||
!mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
|
||||
j++;
|
||||
}
|
||||
|
||||
// Cut the sequence with hmm
|
||||
assert(j - 1 >= i);
|
||||
// TODO
|
||||
hmmSeg_.CutRuneArray(words[i].left, words[j - 1].left + 1, hmmRes);
|
||||
|
||||
//put hmm result to result
|
||||
for (size_t k = 0; k < hmmRes.size(); k++) {
|
||||
res.push_back(GetStringFromRunes(s, hmmRes[k].left, hmmRes[k].right));
|
||||
}
|
||||
|
||||
//clear tmp vars
|
||||
hmmRes.clear();
|
||||
|
||||
//let i jump over this piece
|
||||
i = j - 1;
|
||||
}
|
||||
}
|
||||
|
||||
const DictTrie* GetDictTrie() const override {
|
||||
return mpSeg_.GetDictTrie();
|
||||
}
|
||||
|
||||
bool Tag(const string& src, vector<pair<string, string> >& res) const {
|
||||
bool Tag(const string& src, vector<pair<string, string> >& res) const override {
|
||||
return tagger_.Tag(src, res, *this);
|
||||
}
|
||||
|
||||
|
@ -124,4 +134,3 @@ private:
|
|||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,27 +1,8 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_POS_TAGGING_H
|
||||
#define CPPJIEBA_POS_TAGGING_H
|
||||
#pragma once
|
||||
|
||||
#include "limonp/StringUtil.hpp"
|
||||
#include "SegmentTagged.hpp"
|
||||
#include "DictTrie.hpp"
|
||||
#include "SegmentTagged.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
using namespace limonp;
|
||||
|
@ -39,28 +20,31 @@ public:
|
|||
|
||||
bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
|
||||
vector<string> CutRes;
|
||||
segment.Cut(src, CutRes);
|
||||
segment.CutToStr(src, CutRes);
|
||||
|
||||
for(vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
|
||||
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
|
||||
res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
|
||||
}
|
||||
|
||||
return !res.empty();
|
||||
}
|
||||
|
||||
string LookupTag(const string &str, const SegmentTagged& segment) const {
|
||||
const DictUnit *tmp = NULL;
|
||||
RuneStrArray runes;
|
||||
const DictTrie * dict = segment.GetDictTrie();
|
||||
assert(dict != NULL);
|
||||
if(!DecodeRunesInString(str, runes)) {
|
||||
XLOG(ERROR) << "Decode failed.";
|
||||
return POS_X;
|
||||
}
|
||||
tmp = dict->Find(runes.begin(), runes.end());
|
||||
if(tmp == NULL || tmp->tag.empty()) {
|
||||
const auto tmp = dict->Find(str);
|
||||
|
||||
if (tmp == NULL || tmp->GetTag().empty()) {
|
||||
RuneStrArray runes;
|
||||
|
||||
if (!DecodeRunesInString(str, runes)) {
|
||||
XLOG(ERROR) << "Decode failed.";
|
||||
return POS_X;
|
||||
}
|
||||
|
||||
return SpecialRule(runes);
|
||||
} else {
|
||||
return tmp->tag;
|
||||
return tmp->GetTag();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -68,22 +52,27 @@ private:
|
|||
const char* SpecialRule(const RuneStrArray& unicode) const {
|
||||
size_t m = 0;
|
||||
size_t eng = 0;
|
||||
for(size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
|
||||
if(unicode[i].rune < 0x80) {
|
||||
|
||||
for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
|
||||
if (unicode[i].rune < 0x80) {
|
||||
eng ++;
|
||||
if('0' <= unicode[i].rune && unicode[i].rune <= '9') {
|
||||
|
||||
if ('0' <= unicode[i].rune && unicode[i].rune <= '9') {
|
||||
m++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ascii char is not found
|
||||
if(eng == 0) {
|
||||
if (eng == 0) {
|
||||
return POS_X;
|
||||
}
|
||||
|
||||
// all the ascii is number char
|
||||
if(m == eng) {
|
||||
if (m == eng) {
|
||||
return POS_M;
|
||||
}
|
||||
|
||||
// the ascii chars contain english letter
|
||||
return POS_ENG;
|
||||
}
|
||||
|
@ -92,4 +81,3 @@ private:
|
|||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,43 +1,20 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_PRE_FILTER_H
|
||||
#define CPPJIEBA_PRE_FILTER_H
|
||||
#pragma once
|
||||
|
||||
#include "Trie.hpp"
|
||||
#include "limonp/Logging.hpp"
|
||||
#include <unordered_set>
|
||||
#include "Unicode.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
|
||||
class PreFilter {
|
||||
public:
|
||||
//TODO use WordRange instead of Range
|
||||
struct Range {
|
||||
RuneStrArray::const_iterator begin;
|
||||
RuneStrArray::const_iterator end;
|
||||
}; // struct Range
|
||||
|
||||
PreFilter(const unordered_set<Rune>& symbols,
|
||||
PreFilter(const std::unordered_set<Rune>& symbols,
|
||||
const string& sentence)
|
||||
: symbols_(symbols) {
|
||||
if(!DecodeRunesInString(sentence, sentence_)) {
|
||||
XLOG(ERROR) << "decode failed. ";
|
||||
if (!DecodeRunesInString(sentence, sentence_)) {
|
||||
XLOG(ERROR) << "decode failed. "<<sentence;
|
||||
}
|
||||
|
||||
cursor_ = sentence_.begin();
|
||||
}
|
||||
~PreFilter() {
|
||||
|
@ -45,28 +22,31 @@ public:
|
|||
bool HasNext() const {
|
||||
return cursor_ != sentence_.end();
|
||||
}
|
||||
Range Next() {
|
||||
Range range;
|
||||
range.begin = cursor_;
|
||||
while(cursor_ != sentence_.end()) {
|
||||
if(IsIn(symbols_, cursor_->rune)) {
|
||||
if(range.begin == cursor_) {
|
||||
WordRange Next() {
|
||||
WordRange range(cursor_, cursor_);
|
||||
|
||||
while (cursor_ != sentence_.end()) {
|
||||
//if (IsIn(symbols_, cursor_->rune)) {
|
||||
if (cursor_->rune == 0x20) {
|
||||
if (range.left == cursor_) {
|
||||
cursor_ ++;
|
||||
}
|
||||
range.end = cursor_;
|
||||
|
||||
range.right = cursor_;
|
||||
return range;
|
||||
}
|
||||
|
||||
cursor_ ++;
|
||||
}
|
||||
range.end = sentence_.end();
|
||||
|
||||
range.right = sentence_.end();
|
||||
return range;
|
||||
}
|
||||
private:
|
||||
RuneStrArray::const_iterator cursor_;
|
||||
RuneStrArray sentence_;
|
||||
const unordered_set<Rune>& symbols_;
|
||||
const std::unordered_set<Rune>& symbols_;
|
||||
}; // class PreFilter
|
||||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif // CPPJIEBA_PRE_FILTER_H
|
||||
|
|
|
@ -1,23 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_QUERYSEGMENT_H
|
||||
#define CPPJIEBA_QUERYSEGMENT_H
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
|
@ -28,74 +9,65 @@
|
|||
#include "FullSegment.hpp"
|
||||
#include "MixSegment.hpp"
|
||||
#include "Unicode.hpp"
|
||||
#include "DictTrie.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
class QuerySegment: public SegmentBase {
|
||||
public:
|
||||
QuerySegment(const string& dict, const string& model, const string& userDict = "")
|
||||
: mixSeg_(dict, model, userDict),
|
||||
trie_(mixSeg_.GetDictTrie()) {
|
||||
}
|
||||
QuerySegment(const DictTrie* dictTrie, const HMMModel* model)
|
||||
: mixSeg_(dictTrie, model), trie_(dictTrie) {
|
||||
}
|
||||
~QuerySegment() {
|
||||
}
|
||||
|
||||
void Cut(const string& sentence, vector<string>& words) const {
|
||||
Cut(sentence, words, true);
|
||||
}
|
||||
void Cut(const string& sentence, vector<string>& words, bool hmm) const {
|
||||
vector<Word> tmp;
|
||||
Cut(sentence, tmp, hmm);
|
||||
GetStringsFromWords(tmp, words);
|
||||
}
|
||||
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
||||
PreFilter pre_filter(symbols_, sentence);
|
||||
PreFilter::Range range;
|
||||
vector<WordRange> wrs;
|
||||
wrs.reserve(sentence.size() / 2);
|
||||
while(pre_filter.HasNext()) {
|
||||
range = pre_filter.Next();
|
||||
Cut(range.begin, range.end, wrs, hmm);
|
||||
}
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetWordsFromWordRanges(sentence, wrs, words);
|
||||
}
|
||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
|
||||
virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
|
||||
size_t) const override {
|
||||
//use mix Cut first
|
||||
vector<WordRange> mixRes;
|
||||
mixSeg_.Cut(begin, end, mixRes, hmm);
|
||||
mixSeg_.CutRuneArray(begin, end, mixRes, hmm);
|
||||
|
||||
vector<WordRange> fullRes;
|
||||
for(vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
|
||||
if(mixResItr->Length() > 2) {
|
||||
for(size_t i = 0; i + 1 < mixResItr->Length(); i++) {
|
||||
WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
|
||||
if(trie_->Find(wr.left, wr.right + 1) != NULL) {
|
||||
|
||||
for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
|
||||
if (mixResItr->Length() > 2) {
|
||||
for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
|
||||
string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 2);
|
||||
|
||||
if (trie_->Find(text) != NULL) {
|
||||
WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
|
||||
res.push_back(wr);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(mixResItr->Length() > 3) {
|
||||
for(size_t i = 0; i + 2 < mixResItr->Length(); i++) {
|
||||
WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
|
||||
if(trie_->Find(wr.left, wr.right + 1) != NULL) {
|
||||
|
||||
if (mixResItr->Length() > 3) {
|
||||
for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
|
||||
string text = EncodeRunesToString(mixResItr->left + i, mixResItr->left + i + 3);
|
||||
|
||||
if (trie_->Find(text) != NULL) {
|
||||
WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
|
||||
res.push_back(wr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res.push_back(*mixResItr);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||
size_t) const override {
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
bool IsAllAscii(const Unicode& s) const {
|
||||
for(size_t i = 0; i < s.size(); i++) {
|
||||
if(s[i] >= 0x80) {
|
||||
bool IsAllAscii(const RuneArray& s) const {
|
||||
for (size_t i = 0; i < s.size(); i++) {
|
||||
if (s[i] >= 0x80) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
MixSegment mixSeg_;
|
||||
|
@ -104,4 +76,3 @@ private:
|
|||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,23 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_SEGMENTBASE_H
|
||||
#define CPPJIEBA_SEGMENTBASE_H
|
||||
#pragma once
|
||||
|
||||
#include "limonp/Logging.hpp"
|
||||
#include "PreFilter.hpp"
|
||||
|
@ -35,24 +16,69 @@ public:
|
|||
SegmentBase() {
|
||||
XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
|
||||
}
|
||||
virtual ~SegmentBase() {
|
||||
virtual ~SegmentBase() { }
|
||||
|
||||
virtual void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm,
|
||||
size_t max_word_len) const = 0;
|
||||
//添加基于sentence的cut方法,减少中间变量的存储与格式转换--jxx20210517
|
||||
virtual void CutWithSentence(const string& s, RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<string>& res, bool hmm,
|
||||
size_t max_word_len) const = 0;
|
||||
//重写CutToStr函数,简化获取vector<string>& words的流程,降低内存占用--jxx20210517
|
||||
void CutToStr(const string& sentence, vector<string>& words, bool hmm = true,
|
||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||
/*
|
||||
vector<Word> tmp;
|
||||
CutToWord(sentence, tmp, hmm, max_word_len);
|
||||
GetStringsFromWords(tmp, words);
|
||||
*/
|
||||
PreFilter pre_filter(symbols_, sentence);
|
||||
words.clear();
|
||||
words.reserve(sentence.size() / 2);//todo 参考源码,参数待定
|
||||
while (pre_filter.HasNext()) {
|
||||
auto range = pre_filter.Next();
|
||||
CutWithSentence(sentence, range.left, range.right, words, hmm, max_word_len);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void Cut(const string& sentence, vector<string>& words) const = 0;
|
||||
void CutToWord(const string& sentence, vector<Word>& words, bool hmm = true,
|
||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||
PreFilter pre_filter(symbols_, sentence);
|
||||
vector<WordRange> wrs;
|
||||
wrs.reserve(sentence.size() / 2);
|
||||
|
||||
while (pre_filter.HasNext()) {
|
||||
auto range = pre_filter.Next();
|
||||
Cut(range.left, range.right, wrs, hmm, max_word_len);
|
||||
}
|
||||
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetWordsFromWordRanges(sentence, wrs, words);
|
||||
wrs.clear();
|
||||
vector<WordRange>().swap(wrs);
|
||||
}
|
||||
|
||||
void CutRuneArray(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res,
|
||||
bool hmm = true, size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||
Cut(begin, end, res, hmm, max_word_len);
|
||||
}
|
||||
|
||||
bool ResetSeparators(const string& s) {
|
||||
symbols_.clear();
|
||||
RuneStrArray runes;
|
||||
if(!DecodeRunesInString(s, runes)) {
|
||||
|
||||
if (!DecodeRunesInString(s, runes)) {
|
||||
XLOG(ERROR) << "decode " << s << " failed";
|
||||
return false;
|
||||
}
|
||||
for(size_t i = 0; i < runes.size(); i++) {
|
||||
if(!symbols_.insert(runes[i].rune).second) {
|
||||
|
||||
for (size_t i = 0; i < runes.size(); i++) {
|
||||
if (!symbols_.insert(runes[i].rune).second) {
|
||||
XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
protected:
|
||||
|
@ -61,4 +87,3 @@ protected:
|
|||
|
||||
} // cppjieba
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,23 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_SEGMENTTAGGED_H
|
||||
#define CPPJIEBA_SEGMENTTAGGED_H
|
||||
#pragma once
|
||||
|
||||
#include "SegmentBase.hpp"
|
||||
|
||||
|
@ -38,4 +19,3 @@ public:
|
|||
|
||||
} // cppjieba
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,212 +1,205 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H
|
||||
#define CPPJIEBA_TEXTRANK_EXTRACTOR_H
|
||||
|
||||
#include <cmath>
|
||||
#include "Jieba.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
using namespace limonp;
|
||||
using namespace std;
|
||||
|
||||
class TextRankExtractor {
|
||||
public:
|
||||
typedef struct _Word {
|
||||
string word;
|
||||
vector<size_t> offsets;
|
||||
double weight;
|
||||
} Word; // struct Word
|
||||
private:
|
||||
typedef std::map<string, Word> WordMap;
|
||||
|
||||
class WordGraph {
|
||||
private:
|
||||
typedef double Score;
|
||||
typedef string Node;
|
||||
typedef std::set<Node> NodeSet;
|
||||
|
||||
typedef std::map<Node, double> Edges;
|
||||
typedef std::map<Node, Edges> Graph;
|
||||
//typedef std::unordered_map<Node,double> Edges;
|
||||
//typedef std::unordered_map<Node,Edges> Graph;
|
||||
|
||||
double d;
|
||||
Graph graph;
|
||||
NodeSet nodeSet;
|
||||
public:
|
||||
WordGraph(): d(0.85) {};
|
||||
WordGraph(double in_d): d(in_d) {};
|
||||
|
||||
void addEdge(Node start, Node end, double weight) {
|
||||
Edges temp;
|
||||
Edges::iterator gotEdges;
|
||||
nodeSet.insert(start);
|
||||
nodeSet.insert(end);
|
||||
graph[start][end] += weight;
|
||||
graph[end][start] += weight;
|
||||
}
|
||||
|
||||
void rank(WordMap &ws, size_t rankTime = 10) {
|
||||
WordMap outSum;
|
||||
Score wsdef, min_rank, max_rank;
|
||||
|
||||
if(graph.size() == 0)
|
||||
return;
|
||||
|
||||
wsdef = 1.0 / graph.size();
|
||||
|
||||
for(Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) {
|
||||
// edges->first start节点;edge->first end节点;edge->second 权重
|
||||
ws[edges->first].word = edges->first;
|
||||
ws[edges->first].weight = wsdef;
|
||||
outSum[edges->first].weight = 0;
|
||||
for(Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) {
|
||||
outSum[edges->first].weight += edge->second;
|
||||
}
|
||||
}
|
||||
//sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
|
||||
for(size_t i = 0; i < rankTime; i++) {
|
||||
for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) {
|
||||
double s = 0;
|
||||
for(Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
|
||||
// edge->first end节点;edge->second 权重
|
||||
s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
|
||||
ws[*node].weight = (1 - d) + d * s;
|
||||
}
|
||||
}
|
||||
|
||||
min_rank = max_rank = ws.begin()->second.weight;
|
||||
for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
|
||||
if(i->second.weight < min_rank) {
|
||||
min_rank = i->second.weight;
|
||||
}
|
||||
if(i->second.weight > max_rank) {
|
||||
max_rank = i->second.weight;
|
||||
}
|
||||
}
|
||||
for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
|
||||
ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
TextRankExtractor(const string& dictPath,
|
||||
const string& hmmFilePath,
|
||||
const string& stopWordPath,
|
||||
const string& userDict = "")
|
||||
: segment_(dictPath, hmmFilePath, userDict) {
|
||||
LoadStopWordDict(stopWordPath);
|
||||
}
|
||||
TextRankExtractor(const DictTrie* dictTrie,
|
||||
const HMMModel* model,
|
||||
const string& stopWordPath)
|
||||
: segment_(dictTrie, model) {
|
||||
LoadStopWordDict(stopWordPath);
|
||||
}
|
||||
TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
|
||||
LoadStopWordDict(stopWordPath);
|
||||
}
|
||||
~TextRankExtractor() {
|
||||
}
|
||||
|
||||
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
||||
vector<Word> topWords;
|
||||
Extract(sentence, topWords, topN);
|
||||
for(size_t i = 0; i < topWords.size(); i++) {
|
||||
keywords.push_back(topWords[i].word);
|
||||
}
|
||||
}
|
||||
|
||||
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
|
||||
vector<Word> topWords;
|
||||
Extract(sentence, topWords, topN);
|
||||
for(size_t i = 0; i < topWords.size(); i++) {
|
||||
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
|
||||
}
|
||||
}
|
||||
|
||||
void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
|
||||
vector<string> words;
|
||||
segment_.Cut(sentence, words);
|
||||
|
||||
TextRankExtractor::WordGraph graph;
|
||||
WordMap wordmap;
|
||||
size_t offset = 0;
|
||||
|
||||
for(size_t i = 0; i < words.size(); i++) {
|
||||
size_t t = offset;
|
||||
offset += words[i].size();
|
||||
if(IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
|
||||
continue;
|
||||
}
|
||||
for(size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) {
|
||||
if(IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
|
||||
skip++;
|
||||
continue;
|
||||
}
|
||||
graph.addEdge(words[i], words[j], 1);
|
||||
}
|
||||
wordmap[words[i]].offsets.push_back(t);
|
||||
}
|
||||
if(offset != sentence.size()) {
|
||||
XLOG(ERROR) << "words illegal";
|
||||
return;
|
||||
}
|
||||
|
||||
graph.rank(wordmap, rankTime);
|
||||
|
||||
keywords.clear();
|
||||
keywords.reserve(wordmap.size());
|
||||
for(WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
|
||||
keywords.push_back(itr->second);
|
||||
}
|
||||
|
||||
topN = min(topN, keywords.size());
|
||||
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
|
||||
keywords.resize(topN);
|
||||
}
|
||||
private:
|
||||
void LoadStopWordDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
|
||||
string line ;
|
||||
while(getline(ifs, line)) {
|
||||
stopWords_.insert(line);
|
||||
}
|
||||
assert(stopWords_.size());
|
||||
}
|
||||
|
||||
static bool Compare(const Word &x, const Word &y) {
|
||||
return x.weight > y.weight;
|
||||
}
|
||||
|
||||
MixSegment segment_;
|
||||
unordered_set<string> stopWords_;
|
||||
}; // class TextRankExtractor
|
||||
|
||||
inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
|
||||
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}";
|
||||
}
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#include <cmath>
|
||||
#include "Jieba.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
using namespace limonp;
|
||||
using namespace std;
|
||||
|
||||
class TextRankExtractor {
|
||||
public:
|
||||
typedef struct _Word {
|
||||
string word;
|
||||
vector<size_t> offsets;
|
||||
double weight;
|
||||
} Word; // struct Word
|
||||
private:
|
||||
typedef std::map<string, Word> WordMap;
|
||||
|
||||
class WordGraph {
|
||||
private:
|
||||
typedef double Score;
|
||||
typedef string Node;
|
||||
typedef std::set<Node> NodeSet;
|
||||
|
||||
typedef std::map<Node, double> Edges;
|
||||
typedef std::map<Node, Edges> Graph;
|
||||
//typedef std::unordered_map<Node,double> Edges;
|
||||
//typedef std::unordered_map<Node,Edges> Graph;
|
||||
|
||||
double d;
|
||||
Graph graph;
|
||||
NodeSet nodeSet;
|
||||
public:
|
||||
WordGraph(): d(0.85) {};
|
||||
WordGraph(double in_d): d(in_d) {};
|
||||
|
||||
void addEdge(Node start, Node end, double weight) {
|
||||
Edges temp;
|
||||
Edges::iterator gotEdges;
|
||||
nodeSet.insert(start);
|
||||
nodeSet.insert(end);
|
||||
graph[start][end] += weight;
|
||||
graph[end][start] += weight;
|
||||
}
|
||||
|
||||
void rank(WordMap &ws, size_t rankTime = 10) {
|
||||
WordMap outSum;
|
||||
Score wsdef, min_rank, max_rank;
|
||||
|
||||
if (graph.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
wsdef = 1.0 / graph.size();
|
||||
|
||||
for (Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) {
|
||||
// edges->first start节点;edge->first end节点;edge->second 权重
|
||||
ws[edges->first].word = edges->first;
|
||||
ws[edges->first].weight = wsdef;
|
||||
outSum[edges->first].weight = 0;
|
||||
|
||||
for (Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) {
|
||||
outSum[edges->first].weight += edge->second;
|
||||
}
|
||||
}
|
||||
|
||||
//sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
|
||||
for (size_t i = 0; i < rankTime; i++) {
|
||||
for (NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) {
|
||||
double s = 0;
|
||||
|
||||
for (Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
|
||||
// edge->first end节点;edge->second 权重
|
||||
{
|
||||
s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
|
||||
}
|
||||
|
||||
ws[*node].weight = (1 - d) + d * s;
|
||||
}
|
||||
}
|
||||
|
||||
min_rank = max_rank = ws.begin()->second.weight;
|
||||
|
||||
for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
|
||||
if (i->second.weight < min_rank) {
|
||||
min_rank = i->second.weight;
|
||||
}
|
||||
|
||||
if (i->second.weight > max_rank) {
|
||||
max_rank = i->second.weight;
|
||||
}
|
||||
}
|
||||
|
||||
for (WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
|
||||
ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
TextRankExtractor(const DictTrie* dictTrie,
|
||||
const HMMModel* model,
|
||||
const string& stopWordPath)
|
||||
: segment_(dictTrie, model) {
|
||||
LoadStopWordDict(stopWordPath);
|
||||
}
|
||||
TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
|
||||
LoadStopWordDict(stopWordPath);
|
||||
}
|
||||
~TextRankExtractor() {
|
||||
}
|
||||
|
||||
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
||||
vector<Word> topWords;
|
||||
Extract(sentence, topWords, topN);
|
||||
|
||||
for (size_t i = 0; i < topWords.size(); i++) {
|
||||
keywords.push_back(topWords[i].word);
|
||||
}
|
||||
}
|
||||
|
||||
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
|
||||
vector<Word> topWords;
|
||||
Extract(sentence, topWords, topN);
|
||||
|
||||
for (size_t i = 0; i < topWords.size(); i++) {
|
||||
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
|
||||
}
|
||||
}
|
||||
|
||||
void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
|
||||
vector<string> words;
|
||||
segment_.CutToStr(sentence, words);
|
||||
|
||||
TextRankExtractor::WordGraph graph;
|
||||
WordMap wordmap;
|
||||
size_t offset = 0;
|
||||
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
size_t t = offset;
|
||||
offset += words[i].size();
|
||||
|
||||
if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) {
|
||||
if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
|
||||
skip++;
|
||||
continue;
|
||||
}
|
||||
|
||||
graph.addEdge(words[i], words[j], 1);
|
||||
}
|
||||
|
||||
wordmap[words[i]].offsets.push_back(t);
|
||||
}
|
||||
|
||||
if (offset != sentence.size()) {
|
||||
XLOG(ERROR) << "words illegal";
|
||||
return;
|
||||
}
|
||||
|
||||
graph.rank(wordmap, rankTime);
|
||||
|
||||
keywords.clear();
|
||||
keywords.reserve(wordmap.size());
|
||||
|
||||
for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
|
||||
keywords.push_back(itr->second);
|
||||
}
|
||||
|
||||
topN = min(topN, keywords.size());
|
||||
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
|
||||
keywords.resize(topN);
|
||||
}
|
||||
private:
|
||||
void LoadStopWordDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
|
||||
string line ;
|
||||
|
||||
while (getline(ifs, line)) {
|
||||
stopWords_.insert(line);
|
||||
}
|
||||
|
||||
assert(stopWords_.size());
|
||||
}
|
||||
|
||||
static bool Compare(const Word &x, const Word &y) {
|
||||
return x.weight > y.weight;
|
||||
}
|
||||
|
||||
MixSegment segment_;
|
||||
unordered_set<string> stopWords_;
|
||||
}; // class TextRankExtractor
|
||||
|
||||
inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
|
||||
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight <<
|
||||
"}";
|
||||
}
|
||||
} // namespace cppjieba
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,192 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_TRIE_HPP
|
||||
#define CPPJIEBA_TRIE_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include "limonp/StdExtension.hpp"
|
||||
#include "Unicode.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
|
||||
using namespace std;
|
||||
|
||||
const size_t MAX_WORD_LENGTH = 512;
|
||||
|
||||
struct DictUnit {
|
||||
Unicode word;
|
||||
double weight;
|
||||
string tag;
|
||||
}; // struct DictUnit
|
||||
|
||||
// for debugging
|
||||
// inline ostream & operator << (ostream& os, const DictUnit& unit) {
|
||||
// string s;
|
||||
// s << unit.word;
|
||||
// return os << StringFormat("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
|
||||
// }
|
||||
|
||||
struct Dag {
|
||||
RuneStr runestr;
|
||||
// [offset, nexts.first]
|
||||
limonp::LocalVector<pair<size_t, const DictUnit*> > nexts;
|
||||
const DictUnit * pInfo;
|
||||
double weight;
|
||||
size_t nextPos; // TODO
|
||||
Dag(): runestr(), pInfo(NULL), weight(0.0), nextPos(0) {
|
||||
}
|
||||
}; // struct Dag
|
||||
|
||||
typedef Rune TrieKey;
|
||||
|
||||
class TrieNode {
|
||||
public :
|
||||
TrieNode(): next(NULL), ptValue(NULL) {
|
||||
}
|
||||
public:
|
||||
typedef unordered_map<TrieKey, TrieNode*> NextMap;
|
||||
NextMap *next;
|
||||
const DictUnit *ptValue;
|
||||
};
|
||||
|
||||
class Trie {
|
||||
public:
|
||||
Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
|
||||
: root_(new TrieNode) {
|
||||
CreateTrie(keys, valuePointers);
|
||||
}
|
||||
~Trie() {
|
||||
DeleteNode(root_);
|
||||
}
|
||||
|
||||
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
||||
if(begin == end) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const TrieNode* ptNode = root_;
|
||||
TrieNode::NextMap::const_iterator citer;
|
||||
for(RuneStrArray::const_iterator it = begin; it != end; it++) {
|
||||
if(NULL == ptNode->next) {
|
||||
return NULL;
|
||||
}
|
||||
citer = ptNode->next->find(it->rune);
|
||||
if(ptNode->next->end() == citer) {
|
||||
return NULL;
|
||||
}
|
||||
ptNode = citer->second;
|
||||
}
|
||||
return ptNode->ptValue;
|
||||
}
|
||||
|
||||
void Find(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end,
|
||||
vector<struct Dag>&res,
|
||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||
assert(root_ != NULL);
|
||||
res.resize(end - begin);
|
||||
|
||||
const TrieNode *ptNode = NULL;
|
||||
TrieNode::NextMap::const_iterator citer;
|
||||
for(size_t i = 0; i < size_t(end - begin); i++) {
|
||||
res[i].runestr = *(begin + i);
|
||||
|
||||
if(root_->next != NULL && root_->next->end() != (citer = root_->next->find(res[i].runestr.rune))) {
|
||||
ptNode = citer->second;
|
||||
} else {
|
||||
ptNode = NULL;
|
||||
}
|
||||
if(ptNode != NULL) {
|
||||
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, ptNode->ptValue));
|
||||
} else {
|
||||
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, static_cast<const DictUnit*>(NULL)));
|
||||
}
|
||||
|
||||
for(size_t j = i + 1; j < size_t(end - begin) && (j - i + 1) <= max_word_len; j++) {
|
||||
if(ptNode == NULL || ptNode->next == NULL) {
|
||||
break;
|
||||
}
|
||||
citer = ptNode->next->find((begin + j)->rune);
|
||||
if(ptNode->next->end() == citer) {
|
||||
break;
|
||||
}
|
||||
ptNode = citer->second;
|
||||
if(NULL != ptNode->ptValue) {
|
||||
res[i].nexts.push_back(pair<size_t, const DictUnit*>(j, ptNode->ptValue));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InsertNode(const Unicode& key, const DictUnit* ptValue) {
|
||||
if(key.begin() == key.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
TrieNode::NextMap::const_iterator kmIter;
|
||||
TrieNode *ptNode = root_;
|
||||
for(Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) {
|
||||
if(NULL == ptNode->next) {
|
||||
ptNode->next = new TrieNode::NextMap;
|
||||
}
|
||||
kmIter = ptNode->next->find(*citer);
|
||||
if(ptNode->next->end() == kmIter) {
|
||||
TrieNode *nextNode = new TrieNode;
|
||||
|
||||
ptNode->next->insert(make_pair(*citer, nextNode));
|
||||
ptNode = nextNode;
|
||||
} else {
|
||||
ptNode = kmIter->second;
|
||||
}
|
||||
}
|
||||
assert(ptNode != NULL);
|
||||
ptNode->ptValue = ptValue;
|
||||
}
|
||||
|
||||
private:
|
||||
void CreateTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
|
||||
if(valuePointers.empty() || keys.empty()) {
|
||||
return;
|
||||
}
|
||||
assert(keys.size() == valuePointers.size());
|
||||
|
||||
for(size_t i = 0; i < keys.size(); i++) {
|
||||
InsertNode(keys[i], valuePointers[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void DeleteNode(TrieNode* node) {
|
||||
if(NULL == node) {
|
||||
return;
|
||||
}
|
||||
if(NULL != node->next) {
|
||||
for(TrieNode::NextMap::iterator it = node->next->begin(); it != node->next->end(); ++it) {
|
||||
DeleteNode(it->second);
|
||||
}
|
||||
delete node->next;
|
||||
}
|
||||
delete node;
|
||||
}
|
||||
|
||||
TrieNode* root_;
|
||||
}; // class Trie
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif // CPPJIEBA_TRIE_HPP
|
|
@ -1,23 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef CPPJIEBA_UNICODE_H
|
||||
#define CPPJIEBA_UNICODE_H
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -25,6 +6,7 @@
|
|||
#include <vector>
|
||||
#include <ostream>
|
||||
#include "limonp/LocalVector.hpp"
|
||||
#include "limonp/StringUtil.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
|
||||
|
@ -50,28 +32,28 @@ inline std::ostream& operator << (std::ostream& os, const Word& w) {
|
|||
return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
|
||||
}
|
||||
|
||||
struct RuneStr {
|
||||
struct RuneInfo {
|
||||
Rune rune;
|
||||
uint32_t offset;
|
||||
uint32_t len;
|
||||
uint32_t unicode_offset;
|
||||
uint32_t unicode_length;
|
||||
RuneStr(): rune(0), offset(0), len(0), unicode_offset(0), unicode_length(0) {
|
||||
uint32_t unicode_offset = 0;
|
||||
uint32_t unicode_length = 0;
|
||||
RuneInfo(): rune(0), offset(0), len(0) {
|
||||
}
|
||||
RuneStr(Rune r, uint32_t o, uint32_t l)
|
||||
: rune(r), offset(o), len(l), unicode_offset(0), unicode_length(0) {
|
||||
RuneInfo(Rune r, uint32_t o, uint32_t l)
|
||||
: rune(r), offset(o), len(l) {
|
||||
}
|
||||
RuneStr(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
|
||||
RuneInfo(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
|
||||
: rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
|
||||
}
|
||||
}; // struct RuneStr
|
||||
}; // struct RuneInfo
|
||||
|
||||
inline std::ostream& operator << (std::ostream& os, const RuneStr& r) {
|
||||
inline std::ostream& operator << (std::ostream& os, const RuneInfo& r) {
|
||||
return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
|
||||
}
|
||||
|
||||
typedef limonp::LocalVector<Rune> Unicode;
|
||||
typedef limonp::LocalVector<struct RuneStr> RuneStrArray;
|
||||
typedef limonp::LocalVector<Rune> RuneArray;
|
||||
typedef limonp::LocalVector<struct RuneInfo> RuneStrArray;
|
||||
|
||||
// [left, right]
|
||||
struct WordRange {
|
||||
|
@ -83,127 +65,169 @@ struct WordRange {
|
|||
size_t Length() const {
|
||||
return right - left + 1;
|
||||
}
|
||||
|
||||
bool IsAllAscii() const {
|
||||
for(RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
|
||||
if(iter->rune >= 0x80) {
|
||||
for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
|
||||
if (iter->rune >= 0x80) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}; // struct WordRange
|
||||
|
||||
struct RuneStrLite {
|
||||
uint32_t rune;
|
||||
uint32_t len;
|
||||
RuneStrLite(): rune(0), len(0) {
|
||||
}
|
||||
RuneStrLite(uint32_t r, uint32_t l): rune(r), len(l) {
|
||||
}
|
||||
}; // struct RuneStrLite
|
||||
|
||||
inline RuneStrLite DecodeRuneInString(const char* str, size_t len) {
|
||||
RuneStrLite rp(0, 0);
|
||||
if(str == NULL || len == 0) {
|
||||
return rp;
|
||||
}
|
||||
if(!(str[0] & 0x80)) { // 0xxxxxxx
|
||||
// 7bit, total 7bit
|
||||
rp.rune = (uint8_t)(str[0]) & 0x7f;
|
||||
rp.len = 1;
|
||||
} else if((uint8_t)str[0] <= 0xdf && 1 < len) {
|
||||
// 110xxxxxx
|
||||
// 5bit, total 5bit
|
||||
rp.rune = (uint8_t)(str[0]) & 0x1f;
|
||||
|
||||
// 6bit, total 11bit
|
||||
rp.rune <<= 6;
|
||||
rp.rune |= (uint8_t)(str[1]) & 0x3f;
|
||||
rp.len = 2;
|
||||
} else if((uint8_t)str[0] <= 0xef && 2 < len) { // 1110xxxxxx
|
||||
// 4bit, total 4bit
|
||||
rp.rune = (uint8_t)(str[0]) & 0x0f;
|
||||
|
||||
// 6bit, total 10bit
|
||||
rp.rune <<= 6;
|
||||
rp.rune |= (uint8_t)(str[1]) & 0x3f;
|
||||
|
||||
// 6bit, total 16bit
|
||||
rp.rune <<= 6;
|
||||
rp.rune |= (uint8_t)(str[2]) & 0x3f;
|
||||
|
||||
rp.len = 3;
|
||||
} else if((uint8_t)str[0] <= 0xf7 && 3 < len) { // 11110xxxx
|
||||
// 3bit, total 3bit
|
||||
rp.rune = (uint8_t)(str[0]) & 0x07;
|
||||
|
||||
// 6bit, total 9bit
|
||||
rp.rune <<= 6;
|
||||
rp.rune |= (uint8_t)(str[1]) & 0x3f;
|
||||
|
||||
// 6bit, total 15bit
|
||||
rp.rune <<= 6;
|
||||
rp.rune |= (uint8_t)(str[2]) & 0x3f;
|
||||
|
||||
// 6bit, total 21bit
|
||||
rp.rune <<= 6;
|
||||
rp.rune |= (uint8_t)(str[3]) & 0x3f;
|
||||
|
||||
rp.len = 4;
|
||||
} else {
|
||||
rp.rune = 0;
|
||||
rp.len = 0;
|
||||
}
|
||||
return rp;
|
||||
inline bool DecodeRunesInString(const string& s, RuneArray& arr) {
|
||||
arr.clear();
|
||||
return limonp::Utf8ToUnicode32(s, arr);
|
||||
}
|
||||
|
||||
inline bool DecodeRunesInString(const char* s, size_t len, RuneStrArray& runes) {
|
||||
runes.clear();
|
||||
runes.reserve(len / 2);
|
||||
for(uint32_t i = 0, j = 0; i < len;) {
|
||||
RuneStrLite rp = DecodeRuneInString(s + i, len - i);
|
||||
if(rp.len == 0) {
|
||||
runes.clear();
|
||||
return false;
|
||||
}
|
||||
RuneStr x(rp.rune, i, rp.len, j, 1);
|
||||
runes.push_back(x);
|
||||
i += rp.len;
|
||||
++j;
|
||||
}
|
||||
return true;
|
||||
inline RuneArray DecodeRunesInString(const string& s) {
|
||||
RuneArray result;
|
||||
DecodeRunesInString(s, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
//重写DecodeRunesInString函数,将实现放入函数中降低内存占用加快处理流程--jxx20210518
|
||||
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
|
||||
return DecodeRunesInString(s.c_str(), s.size(), runes);
|
||||
}
|
||||
/*
|
||||
RuneArray arr;
|
||||
|
||||
inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
|
||||
unicode.clear();
|
||||
RuneStrArray runes;
|
||||
if(!DecodeRunesInString(s, len, runes)) {
|
||||
if (not DecodeRunesInString(s, arr)) {
|
||||
return false;
|
||||
}
|
||||
unicode.reserve(runes.size());
|
||||
for(size_t i = 0; i < runes.size(); i++) {
|
||||
unicode.push_back(runes[i].rune);
|
||||
|
||||
runes.clear();
|
||||
|
||||
uint32_t offset = 0;
|
||||
|
||||
for (uint32_t i = 0; i < arr.size(); ++i) {
|
||||
const uint32_t len = limonp::UnicodeToUtf8Bytes(arr[i]);
|
||||
RuneInfo x(arr[i], offset, len, i, 1);
|
||||
runes.push_back(x);
|
||||
offset += len;
|
||||
}
|
||||
*/
|
||||
|
||||
uint32_t tmp;
|
||||
uint32_t offset = 0;
|
||||
runes.clear();
|
||||
for(size_t i = 0; i < s.size();) {
|
||||
if(!(s.data()[i] & 0x80)) { // 0xxxxxxx
|
||||
// 7bit, total 7bit
|
||||
tmp = (uint8_t)(s.data()[i]) & 0x7f;
|
||||
i++;
|
||||
} else if ((uint8_t)s.data()[i] <= 0xdf && i + 1 < s.size()) { // 110xxxxxx
|
||||
// 5bit, total 5bit
|
||||
tmp = (uint8_t)(s.data()[i]) & 0x1f;
|
||||
|
||||
// 6bit, total 11bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
|
||||
i += 2;
|
||||
} else if((uint8_t)s.data()[i] <= 0xef && i + 2 < s.size()) { // 1110xxxxxx
|
||||
// 4bit, total 4bit
|
||||
tmp = (uint8_t)(s.data()[i]) & 0x0f;
|
||||
|
||||
// 6bit, total 10bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
|
||||
|
||||
// 6bit, total 16bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
|
||||
|
||||
i += 3;
|
||||
} else if((uint8_t)s.data()[i] <= 0xf7 && i + 3 < s.size()) { // 11110xxxx
|
||||
// 3bit, total 3bit
|
||||
tmp = (uint8_t)(s.data()[i]) & 0x07;
|
||||
|
||||
// 6bit, total 9bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(s.data()[i+1]) & 0x3f;
|
||||
|
||||
// 6bit, total 15bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(s.data()[i+2]) & 0x3f;
|
||||
|
||||
// 6bit, total 21bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(s.data()[i+3]) & 0x3f;
|
||||
|
||||
i += 4;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
uint32_t len = limonp::UnicodeToUtf8Bytes(tmp);
|
||||
RuneInfo x(tmp, offset, len, i, 1);
|
||||
runes.push_back(x);
|
||||
offset += len;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
class RunePtrWrapper {
|
||||
public:
|
||||
const RuneInfo * m_ptr = nullptr;
|
||||
|
||||
public:
|
||||
explicit RunePtrWrapper(const RuneInfo * p) : m_ptr(p) {}
|
||||
|
||||
uint32_t operator *() {
|
||||
return m_ptr->rune;
|
||||
}
|
||||
|
||||
RunePtrWrapper operator ++(int) {
|
||||
m_ptr ++;
|
||||
return RunePtrWrapper(m_ptr);
|
||||
}
|
||||
|
||||
bool operator !=(const RunePtrWrapper & b) const {
|
||||
return this->m_ptr != b.m_ptr;
|
||||
}
|
||||
};
|
||||
|
||||
inline string EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) {
|
||||
string str;
|
||||
RunePtrWrapper it_begin(begin), it_end(end);
|
||||
limonp::Unicode32ToUtf8(it_begin, it_end, str);
|
||||
return str;
|
||||
}
|
||||
|
||||
inline void EncodeRunesToString(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, string& str) {
|
||||
RunePtrWrapper it_begin(begin), it_end(end);
|
||||
limonp::Unicode32ToUtf8(it_begin, it_end, str);
|
||||
return;
|
||||
}
|
||||
|
||||
class Unicode32Counter {
|
||||
public :
|
||||
size_t length = 0;
|
||||
void clear() {
|
||||
length = 0;
|
||||
}
|
||||
void push_back(uint32_t) {
|
||||
++length;
|
||||
}
|
||||
};
|
||||
|
||||
inline size_t Utf8CharNum(const char * str, size_t length) {
|
||||
Unicode32Counter c;
|
||||
|
||||
if (limonp::Utf8ToUnicode32(str, length, c)) {
|
||||
return c.length;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline size_t Utf8CharNum(const string & str) {
|
||||
return Utf8CharNum(str.data(), str.size());
|
||||
}
|
||||
|
||||
inline bool IsSingleWord(const string& str) {
|
||||
RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size());
|
||||
return rp.len == str.size();
|
||||
}
|
||||
|
||||
inline bool DecodeRunesInString(const string& s, Unicode& unicode) {
|
||||
return DecodeRunesInString(s.c_str(), s.size(), unicode);
|
||||
}
|
||||
|
||||
inline Unicode DecodeRunesInString(const string& s) {
|
||||
Unicode result;
|
||||
DecodeRunesInString(s, result);
|
||||
return result;
|
||||
return Utf8CharNum(str) == 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -218,28 +242,31 @@ inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left,
|
|||
inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
|
||||
assert(right->offset >= left->offset);
|
||||
uint32_t len = right->offset - left->offset + right->len;
|
||||
return s.substr(left->offset, len);
|
||||
uint32_t unicode_length = right->unicode_offset - left->unicode_offset + right->unicode_length;
|
||||
return Word(s.substr(left->offset, len), left->offset, left->unicode_offset, unicode_length).word;
|
||||
}
|
||||
|
||||
inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
|
||||
for(size_t i = 0; i < wrs.size(); i++) {
|
||||
for (size_t i = 0; i < wrs.size(); i++) {
|
||||
words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
|
||||
}
|
||||
}
|
||||
|
||||
inline vector<Word> GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs) {
|
||||
vector<Word> result;
|
||||
GetWordsFromWordRanges(s, wrs, result);
|
||||
return result;
|
||||
inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
|
||||
for (size_t i = 0; i < wrs.size(); i++) {
|
||||
words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
|
||||
}
|
||||
}
|
||||
|
||||
inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
|
||||
strs.resize(words.size());
|
||||
for(size_t i = 0; i < words.size(); ++i) {
|
||||
|
||||
for (size_t i = 0; i < words.size(); ++i) {
|
||||
strs[i] = words[i].word;
|
||||
}
|
||||
}
|
||||
|
||||
const size_t MAX_WORD_LENGTH = 512;
|
||||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif // CPPJIEBA_UNICODE_H
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
/************************************
|
||||
* file enc : ascii
|
||||
* author : wuyanyi09@gmail.com
|
||||
|
@ -33,54 +15,54 @@ namespace limonp {
|
|||
using namespace std;
|
||||
|
||||
class ArgvContext {
|
||||
public :
|
||||
ArgvContext(int argc, const char* const * argv) {
|
||||
for(int i = 0; i < argc; i++) {
|
||||
if(StartsWith(argv[i], "-")) {
|
||||
if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
|
||||
mpss_[argv[i]] = argv[i + 1];
|
||||
i++;
|
||||
} else {
|
||||
sset_.insert(argv[i]);
|
||||
}
|
||||
} else {
|
||||
args_.push_back(argv[i]);
|
||||
}
|
||||
public :
|
||||
ArgvContext(int argc, const char* const * argv) {
|
||||
for(int i = 0; i < argc; i++) {
|
||||
if(StartsWith(argv[i], "-")) {
|
||||
if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
|
||||
mpss_[argv[i]] = argv[i+1];
|
||||
i++;
|
||||
} else {
|
||||
sset_.insert(argv[i]);
|
||||
}
|
||||
} else {
|
||||
args_.push_back(argv[i]);
|
||||
}
|
||||
}
|
||||
~ArgvContext() {
|
||||
}
|
||||
}
|
||||
~ArgvContext() {
|
||||
}
|
||||
|
||||
friend ostream& operator << (ostream& os, const ArgvContext& args);
|
||||
string operator [](size_t i) const {
|
||||
if(i < args_.size()) {
|
||||
return args_[i];
|
||||
}
|
||||
return "";
|
||||
friend ostream& operator << (ostream& os, const ArgvContext& args);
|
||||
string operator [](size_t i) const {
|
||||
if(i < args_.size()) {
|
||||
return args_[i];
|
||||
}
|
||||
string operator [](const string& key) const {
|
||||
map<string, string>::const_iterator it = mpss_.find(key);
|
||||
if(it != mpss_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return "";
|
||||
return "";
|
||||
}
|
||||
string operator [](const string& key) const {
|
||||
map<string, string>::const_iterator it = mpss_.find(key);
|
||||
if(it != mpss_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
bool HasKey(const string& key) const {
|
||||
if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
bool HasKey(const string& key) const {
|
||||
if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
vector<string> args_;
|
||||
map<string, string> mpss_;
|
||||
set<string> sset_;
|
||||
private:
|
||||
vector<string> args_;
|
||||
map<string, string> mpss_;
|
||||
set<string> sset_;
|
||||
}; // class ArgvContext
|
||||
|
||||
inline ostream& operator << (ostream& os, const ArgvContext& args) {
|
||||
return os << args.args_ << args.mpss_ << args.sset_;
|
||||
return os<<args.args_<<args.mpss_<<args.sset_;
|
||||
}
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_BLOCKINGQUEUE_HPP
|
||||
#define LIMONP_BLOCKINGQUEUE_HPP
|
||||
|
||||
|
@ -25,41 +7,41 @@
|
|||
namespace limonp {
|
||||
template<class T>
|
||||
class BlockingQueue: NonCopyable {
|
||||
public:
|
||||
BlockingQueue()
|
||||
: mutex_(), notEmpty_(mutex_), queue_() {
|
||||
}
|
||||
public:
|
||||
BlockingQueue()
|
||||
: mutex_(), notEmpty_(mutex_), queue_() {
|
||||
}
|
||||
|
||||
void Push(const T& x) {
|
||||
MutexLockGuard lock(mutex_);
|
||||
queue_.push(x);
|
||||
notEmpty_.Notify(); // Wait morphing saves us
|
||||
}
|
||||
void Push(const T& x) {
|
||||
MutexLockGuard lock(mutex_);
|
||||
queue_.push(x);
|
||||
notEmpty_.Notify(); // Wait morphing saves us
|
||||
}
|
||||
|
||||
T Pop() {
|
||||
MutexLockGuard lock(mutex_);
|
||||
// always use a while-loop, due to spurious wakeup
|
||||
while(queue_.empty()) {
|
||||
notEmpty_.Wait();
|
||||
}
|
||||
assert(!queue_.empty());
|
||||
T front(queue_.front());
|
||||
queue_.pop();
|
||||
return front;
|
||||
T Pop() {
|
||||
MutexLockGuard lock(mutex_);
|
||||
// always use a while-loop, due to spurious wakeup
|
||||
while (queue_.empty()) {
|
||||
notEmpty_.Wait();
|
||||
}
|
||||
assert(!queue_.empty());
|
||||
T front(queue_.front());
|
||||
queue_.pop();
|
||||
return front;
|
||||
}
|
||||
|
||||
size_t Size() const {
|
||||
MutexLockGuard lock(mutex_);
|
||||
return queue_.size();
|
||||
}
|
||||
bool Empty() const {
|
||||
return Size() == 0;
|
||||
}
|
||||
size_t Size() const {
|
||||
MutexLockGuard lock(mutex_);
|
||||
return queue_.size();
|
||||
}
|
||||
bool Empty() const {
|
||||
return Size() == 0;
|
||||
}
|
||||
|
||||
private:
|
||||
mutable MutexLock mutex_;
|
||||
Condition notEmpty_;
|
||||
std::queue<T> queue_;
|
||||
private:
|
||||
mutable MutexLock mutex_;
|
||||
Condition notEmpty_;
|
||||
std::queue<T> queue_;
|
||||
}; // class BlockingQueue
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
|
||||
#define LIMONP_BOUNDED_BLOCKING_QUEUE_HPP
|
||||
|
||||
|
@ -25,59 +7,59 @@ namespace limonp {
|
|||
|
||||
template<typename T>
|
||||
class BoundedBlockingQueue : NonCopyable {
|
||||
public:
|
||||
explicit BoundedBlockingQueue(size_t maxSize)
|
||||
: mutex_(),
|
||||
notEmpty_(mutex_),
|
||||
notFull_(mutex_),
|
||||
queue_(maxSize) {
|
||||
}
|
||||
public:
|
||||
explicit BoundedBlockingQueue(size_t maxSize)
|
||||
: mutex_(),
|
||||
notEmpty_(mutex_),
|
||||
notFull_(mutex_),
|
||||
queue_(maxSize) {
|
||||
}
|
||||
|
||||
void Push(const T& x) {
|
||||
MutexLockGuard lock(mutex_);
|
||||
while(queue_.Full()) {
|
||||
notFull_.Wait();
|
||||
}
|
||||
assert(!queue_.Full());
|
||||
queue_.Push(x);
|
||||
notEmpty_.Notify();
|
||||
void Push(const T& x) {
|
||||
MutexLockGuard lock(mutex_);
|
||||
while (queue_.Full()) {
|
||||
notFull_.Wait();
|
||||
}
|
||||
assert(!queue_.Full());
|
||||
queue_.Push(x);
|
||||
notEmpty_.Notify();
|
||||
}
|
||||
|
||||
T Pop() {
|
||||
MutexLockGuard lock(mutex_);
|
||||
while(queue_.Empty()) {
|
||||
notEmpty_.Wait();
|
||||
}
|
||||
assert(!queue_.Empty());
|
||||
T res = queue_.Pop();
|
||||
notFull_.Notify();
|
||||
return res;
|
||||
T Pop() {
|
||||
MutexLockGuard lock(mutex_);
|
||||
while (queue_.Empty()) {
|
||||
notEmpty_.Wait();
|
||||
}
|
||||
assert(!queue_.Empty());
|
||||
T res = queue_.Pop();
|
||||
notFull_.Notify();
|
||||
return res;
|
||||
}
|
||||
|
||||
bool Empty() const {
|
||||
MutexLockGuard lock(mutex_);
|
||||
return queue_.Empty();
|
||||
}
|
||||
bool Empty() const {
|
||||
MutexLockGuard lock(mutex_);
|
||||
return queue_.Empty();
|
||||
}
|
||||
|
||||
bool Full() const {
|
||||
MutexLockGuard lock(mutex_);
|
||||
return queue_.Full();
|
||||
}
|
||||
bool Full() const {
|
||||
MutexLockGuard lock(mutex_);
|
||||
return queue_.Full();
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
MutexLockGuard lock(mutex_);
|
||||
return queue_.size();
|
||||
}
|
||||
size_t size() const {
|
||||
MutexLockGuard lock(mutex_);
|
||||
return queue_.size();
|
||||
}
|
||||
|
||||
size_t capacity() const {
|
||||
return queue_.capacity();
|
||||
}
|
||||
size_t capacity() const {
|
||||
return queue_.capacity();
|
||||
}
|
||||
|
||||
private:
|
||||
mutable MutexLock mutex_;
|
||||
Condition notEmpty_;
|
||||
Condition notFull_;
|
||||
BoundedQueue<T> queue_;
|
||||
private:
|
||||
mutable MutexLock mutex_;
|
||||
Condition notEmpty_;
|
||||
Condition notFull_;
|
||||
BoundedQueue<T> queue_;
|
||||
}; // class BoundedBlockingQueue
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_BOUNDED_QUEUE_HPP
|
||||
#define LIMONP_BOUNDED_QUEUE_HPP
|
||||
|
||||
|
@ -27,55 +9,55 @@ namespace limonp {
|
|||
using namespace std;
|
||||
template<class T>
|
||||
class BoundedQueue {
|
||||
public:
|
||||
explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
|
||||
head_ = 0;
|
||||
tail_ = 0;
|
||||
size_ = 0;
|
||||
assert(capacity_);
|
||||
}
|
||||
~BoundedQueue() {
|
||||
}
|
||||
public:
|
||||
explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
|
||||
head_ = 0;
|
||||
tail_ = 0;
|
||||
size_ = 0;
|
||||
assert(capacity_);
|
||||
}
|
||||
~BoundedQueue() {
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
head_ = 0;
|
||||
tail_ = 0;
|
||||
size_ = 0;
|
||||
}
|
||||
bool Empty() const {
|
||||
return !size_;
|
||||
}
|
||||
bool Full() const {
|
||||
return capacity_ == size_;
|
||||
}
|
||||
size_t Size() const {
|
||||
return size_;
|
||||
}
|
||||
size_t Capacity() const {
|
||||
return capacity_;
|
||||
}
|
||||
void Clear() {
|
||||
head_ = 0;
|
||||
tail_ = 0;
|
||||
size_ = 0;
|
||||
}
|
||||
bool Empty() const {
|
||||
return !size_;
|
||||
}
|
||||
bool Full() const {
|
||||
return capacity_ == size_;
|
||||
}
|
||||
size_t Size() const {
|
||||
return size_;
|
||||
}
|
||||
size_t Capacity() const {
|
||||
return capacity_;
|
||||
}
|
||||
|
||||
void Push(const T& t) {
|
||||
assert(!Full());
|
||||
circular_buffer_[tail_] = t;
|
||||
tail_ = (tail_ + 1) % capacity_;
|
||||
size_ ++;
|
||||
}
|
||||
void Push(const T& t) {
|
||||
assert(!Full());
|
||||
circular_buffer_[tail_] = t;
|
||||
tail_ = (tail_ + 1) % capacity_;
|
||||
size_ ++;
|
||||
}
|
||||
|
||||
T Pop() {
|
||||
assert(!Empty());
|
||||
size_t oldPos = head_;
|
||||
head_ = (head_ + 1) % capacity_;
|
||||
size_ --;
|
||||
return circular_buffer_[oldPos];
|
||||
}
|
||||
T Pop() {
|
||||
assert(!Empty());
|
||||
size_t oldPos = head_;
|
||||
head_ = (head_ + 1) % capacity_;
|
||||
size_ --;
|
||||
return circular_buffer_[oldPos];
|
||||
}
|
||||
|
||||
private:
|
||||
size_t head_;
|
||||
size_t tail_;
|
||||
size_t size_;
|
||||
const size_t capacity_;
|
||||
vector<T> circular_buffer_;
|
||||
private:
|
||||
size_t head_;
|
||||
size_t tail_;
|
||||
size_t size_;
|
||||
const size_t capacity_;
|
||||
vector<T> circular_buffer_;
|
||||
|
||||
}; // class BoundedQueue
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,222 +1,204 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_CLOSURE_HPP
|
||||
#define LIMONP_CLOSURE_HPP
|
||||
|
||||
namespace limonp {
|
||||
|
||||
class ClosureInterface {
|
||||
public:
|
||||
virtual ~ClosureInterface() {
|
||||
}
|
||||
virtual void Run() = 0;
|
||||
public:
|
||||
virtual ~ClosureInterface() {
|
||||
}
|
||||
virtual void Run() = 0;
|
||||
};
|
||||
|
||||
template <class Funct>
|
||||
class Closure0: public ClosureInterface {
|
||||
public:
|
||||
Closure0(Funct fun) {
|
||||
fun_ = fun;
|
||||
}
|
||||
virtual ~Closure0() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(*fun_)();
|
||||
}
|
||||
private:
|
||||
Funct fun_;
|
||||
};
|
||||
public:
|
||||
Closure0(Funct fun) {
|
||||
fun_ = fun;
|
||||
}
|
||||
virtual ~Closure0() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(*fun_)();
|
||||
}
|
||||
private:
|
||||
Funct fun_;
|
||||
};
|
||||
|
||||
template <class Funct, class Arg1>
|
||||
class Closure1: public ClosureInterface {
|
||||
public:
|
||||
Closure1(Funct fun, Arg1 arg1) {
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
}
|
||||
virtual ~Closure1() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(*fun_)(arg1_);
|
||||
}
|
||||
private:
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
};
|
||||
public:
|
||||
Closure1(Funct fun, Arg1 arg1) {
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
}
|
||||
virtual ~Closure1() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(*fun_)(arg1_);
|
||||
}
|
||||
private:
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
};
|
||||
|
||||
template <class Funct, class Arg1, class Arg2>
|
||||
class Closure2: public ClosureInterface {
|
||||
public:
|
||||
Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
arg2_ = arg2;
|
||||
}
|
||||
virtual ~Closure2() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(*fun_)(arg1_, arg2_);
|
||||
}
|
||||
private:
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
Arg2 arg2_;
|
||||
};
|
||||
public:
|
||||
Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
arg2_ = arg2;
|
||||
}
|
||||
virtual ~Closure2() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(*fun_)(arg1_, arg2_);
|
||||
}
|
||||
private:
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
Arg2 arg2_;
|
||||
};
|
||||
|
||||
template <class Funct, class Arg1, class Arg2, class Arg3>
|
||||
class Closure3: public ClosureInterface {
|
||||
public:
|
||||
Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
arg2_ = arg2;
|
||||
arg3_ = arg3;
|
||||
}
|
||||
virtual ~Closure3() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(*fun_)(arg1_, arg2_, arg3_);
|
||||
}
|
||||
private:
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
Arg2 arg2_;
|
||||
Arg3 arg3_;
|
||||
};
|
||||
public:
|
||||
Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
arg2_ = arg2;
|
||||
arg3_ = arg3;
|
||||
}
|
||||
virtual ~Closure3() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(*fun_)(arg1_, arg2_, arg3_);
|
||||
}
|
||||
private:
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
Arg2 arg2_;
|
||||
Arg3 arg3_;
|
||||
};
|
||||
|
||||
template <class Obj, class Funct>
|
||||
template <class Obj, class Funct>
|
||||
class ObjClosure0: public ClosureInterface {
|
||||
public:
|
||||
ObjClosure0(Obj* p, Funct fun) {
|
||||
p_ = p;
|
||||
fun_ = fun;
|
||||
}
|
||||
virtual ~ObjClosure0() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(p_->*fun_)();
|
||||
}
|
||||
private:
|
||||
Obj* p_;
|
||||
Funct fun_;
|
||||
};
|
||||
public:
|
||||
ObjClosure0(Obj* p, Funct fun) {
|
||||
p_ = p;
|
||||
fun_ = fun;
|
||||
}
|
||||
virtual ~ObjClosure0() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(p_->*fun_)();
|
||||
}
|
||||
private:
|
||||
Obj* p_;
|
||||
Funct fun_;
|
||||
};
|
||||
|
||||
template <class Obj, class Funct, class Arg1>
|
||||
template <class Obj, class Funct, class Arg1>
|
||||
class ObjClosure1: public ClosureInterface {
|
||||
public:
|
||||
ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
|
||||
p_ = p;
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
}
|
||||
virtual ~ObjClosure1() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(p_->*fun_)(arg1_);
|
||||
}
|
||||
private:
|
||||
Obj* p_;
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
};
|
||||
public:
|
||||
ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
|
||||
p_ = p;
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
}
|
||||
virtual ~ObjClosure1() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(p_->*fun_)(arg1_);
|
||||
}
|
||||
private:
|
||||
Obj* p_;
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
};
|
||||
|
||||
template <class Obj, class Funct, class Arg1, class Arg2>
|
||||
template <class Obj, class Funct, class Arg1, class Arg2>
|
||||
class ObjClosure2: public ClosureInterface {
|
||||
public:
|
||||
ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
|
||||
p_ = p;
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
arg2_ = arg2;
|
||||
}
|
||||
virtual ~ObjClosure2() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(p_->*fun_)(arg1_, arg2_);
|
||||
}
|
||||
private:
|
||||
Obj* p_;
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
Arg2 arg2_;
|
||||
};
|
||||
template <class Obj, class Funct, class Arg1, class Arg2, class Arg3>
|
||||
public:
|
||||
ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
|
||||
p_ = p;
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
arg2_ = arg2;
|
||||
}
|
||||
virtual ~ObjClosure2() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(p_->*fun_)(arg1_, arg2_);
|
||||
}
|
||||
private:
|
||||
Obj* p_;
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
Arg2 arg2_;
|
||||
};
|
||||
template <class Obj, class Funct, class Arg1, class Arg2, class Arg3>
|
||||
class ObjClosure3: public ClosureInterface {
|
||||
public:
|
||||
ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||
p_ = p;
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
arg2_ = arg2;
|
||||
arg3_ = arg3;
|
||||
}
|
||||
virtual ~ObjClosure3() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(p_->*fun_)(arg1_, arg2_, arg3_);
|
||||
}
|
||||
private:
|
||||
Obj* p_;
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
Arg2 arg2_;
|
||||
Arg3 arg3_;
|
||||
};
|
||||
public:
|
||||
ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||
p_ = p;
|
||||
fun_ = fun;
|
||||
arg1_ = arg1;
|
||||
arg2_ = arg2;
|
||||
arg3_ = arg3;
|
||||
}
|
||||
virtual ~ObjClosure3() {
|
||||
}
|
||||
virtual void Run() {
|
||||
(p_->*fun_)(arg1_, arg2_, arg3_);
|
||||
}
|
||||
private:
|
||||
Obj* p_;
|
||||
Funct fun_;
|
||||
Arg1 arg1_;
|
||||
Arg2 arg2_;
|
||||
Arg3 arg3_;
|
||||
};
|
||||
|
||||
template<class R>
|
||||
ClosureInterface* NewClosure(R(*fun)()) {
|
||||
return new Closure0<R(*)()>(fun);
|
||||
ClosureInterface* NewClosure(R (*fun)()) {
|
||||
return new Closure0<R (*)()>(fun);
|
||||
}
|
||||
|
||||
template<class R, class Arg1>
|
||||
ClosureInterface* NewClosure(R(*fun)(Arg1), Arg1 arg1) {
|
||||
return new Closure1<R(*)(Arg1), Arg1>(fun, arg1);
|
||||
ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) {
|
||||
return new Closure1<R (*)(Arg1), Arg1>(fun, arg1);
|
||||
}
|
||||
|
||||
template<class R, class Arg1, class Arg2>
|
||||
ClosureInterface* NewClosure(R(*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
|
||||
return new Closure2<R(*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
|
||||
ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
|
||||
return new Closure2<R (*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
|
||||
}
|
||||
|
||||
template<class R, class Arg1, class Arg2, class Arg3>
|
||||
ClosureInterface* NewClosure(R(*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||
return new Closure3<R(*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
|
||||
ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||
return new Closure3<R (*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
|
||||
}
|
||||
|
||||
template<class R, class Obj>
|
||||
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)()) {
|
||||
return new ObjClosure0<Obj, R(Obj::*)()>(obj, fun);
|
||||
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) {
|
||||
return new ObjClosure0<Obj, R (Obj::* )()>(obj, fun);
|
||||
}
|
||||
|
||||
template<class R, class Obj, class Arg1>
|
||||
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1), Arg1 arg1) {
|
||||
return new ObjClosure1<Obj, R(Obj::*)(Arg1), Arg1>(obj, fun, arg1);
|
||||
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) {
|
||||
return new ObjClosure1<Obj, R (Obj::* )(Arg1), Arg1>(obj, fun, arg1);
|
||||
}
|
||||
|
||||
template<class R, class Obj, class Arg1, class Arg2>
|
||||
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
|
||||
return new ObjClosure2<Obj, R(Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
|
||||
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
|
||||
return new ObjClosure2<Obj, R (Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
|
||||
}
|
||||
|
||||
template<class R, class Obj, class Arg1, class Arg2, class Arg3>
|
||||
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||
return new ObjClosure3<Obj, R(Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
|
||||
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
|
||||
return new ObjClosure3<Obj, R (Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
|
||||
}
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_COLOR_PRINT_HPP
|
||||
#define LIMONP_COLOR_PRINT_HPP
|
||||
|
||||
|
@ -27,21 +9,21 @@ namespace limonp {
|
|||
using std::string;
|
||||
|
||||
enum Color {
|
||||
BLACK = 30,
|
||||
RED,
|
||||
GREEN,
|
||||
YELLOW,
|
||||
BLUE,
|
||||
PURPLE
|
||||
BLACK = 30,
|
||||
RED,
|
||||
GREEN,
|
||||
YELLOW,
|
||||
BLUE,
|
||||
PURPLE
|
||||
}; // enum Color
|
||||
|
||||
static void ColorPrintln(enum Color color, const char * fmt, ...) {
|
||||
va_list ap;
|
||||
printf("\033[0;%dm", color);
|
||||
va_start(ap, fmt);
|
||||
vprintf(fmt, ap);
|
||||
va_end(ap);
|
||||
printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
|
||||
va_list ap;
|
||||
printf("\033[0;%dm", color);
|
||||
va_start(ap, fmt);
|
||||
vprintf(fmt, ap);
|
||||
va_end(ap);
|
||||
printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
|
||||
}
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_CONDITION_HPP
|
||||
#define LIMONP_CONDITION_HPP
|
||||
|
||||
|
@ -24,31 +6,31 @@
|
|||
namespace limonp {
|
||||
|
||||
class Condition : NonCopyable {
|
||||
public:
|
||||
explicit Condition(MutexLock& mutex)
|
||||
: mutex_(mutex) {
|
||||
XCHECK(!pthread_cond_init(&pcond_, NULL));
|
||||
}
|
||||
public:
|
||||
explicit Condition(MutexLock& mutex)
|
||||
: mutex_(mutex) {
|
||||
XCHECK(!pthread_cond_init(&pcond_, NULL));
|
||||
}
|
||||
|
||||
~Condition() {
|
||||
XCHECK(!pthread_cond_destroy(&pcond_));
|
||||
}
|
||||
~Condition() {
|
||||
XCHECK(!pthread_cond_destroy(&pcond_));
|
||||
}
|
||||
|
||||
void Wait() {
|
||||
XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
|
||||
}
|
||||
void Wait() {
|
||||
XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
|
||||
}
|
||||
|
||||
void Notify() {
|
||||
XCHECK(!pthread_cond_signal(&pcond_));
|
||||
}
|
||||
void Notify() {
|
||||
XCHECK(!pthread_cond_signal(&pcond_));
|
||||
}
|
||||
|
||||
void NotifyAll() {
|
||||
XCHECK(!pthread_cond_broadcast(&pcond_));
|
||||
}
|
||||
void NotifyAll() {
|
||||
XCHECK(!pthread_cond_broadcast(&pcond_));
|
||||
}
|
||||
|
||||
private:
|
||||
MutexLock& mutex_;
|
||||
pthread_cond_t pcond_;
|
||||
private:
|
||||
MutexLock& mutex_;
|
||||
pthread_cond_t pcond_;
|
||||
}; // class Condition
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
/************************************
|
||||
* file enc : utf8
|
||||
* author : wuyanyi09@gmail.com
|
||||
|
@ -34,86 +16,86 @@ namespace limonp {
|
|||
using namespace std;
|
||||
|
||||
class Config {
|
||||
public:
|
||||
explicit Config(const string& filePath) {
|
||||
LoadFile(filePath);
|
||||
}
|
||||
public:
|
||||
explicit Config(const string& filePath) {
|
||||
LoadFile(filePath);
|
||||
}
|
||||
|
||||
operator bool () {
|
||||
return !map_.empty();
|
||||
}
|
||||
operator bool () {
|
||||
return !map_.empty();
|
||||
}
|
||||
|
||||
string Get(const string& key, const string& defaultvalue) const {
|
||||
map<string, string>::const_iterator it = map_.find(key);
|
||||
if(map_.end() != it) {
|
||||
return it->second;
|
||||
}
|
||||
return defaultvalue;
|
||||
string Get(const string& key, const string& defaultvalue) const {
|
||||
map<string, string>::const_iterator it = map_.find(key);
|
||||
if(map_.end() != it) {
|
||||
return it->second;
|
||||
}
|
||||
int Get(const string& key, int defaultvalue) const {
|
||||
string str = Get(key, "");
|
||||
if("" == str) {
|
||||
return defaultvalue;
|
||||
}
|
||||
return atoi(str.c_str());
|
||||
return defaultvalue;
|
||||
}
|
||||
int Get(const string& key, int defaultvalue) const {
|
||||
string str = Get(key, "");
|
||||
if("" == str) {
|
||||
return defaultvalue;
|
||||
}
|
||||
const char* operator [](const char* key) const {
|
||||
if(NULL == key) {
|
||||
return NULL;
|
||||
}
|
||||
map<string, string>::const_iterator it = map_.find(key);
|
||||
if(map_.end() != it) {
|
||||
return it->second.c_str();
|
||||
}
|
||||
return NULL;
|
||||
return atoi(str.c_str());
|
||||
}
|
||||
const char* operator [] (const char* key) const {
|
||||
if(NULL == key) {
|
||||
return NULL;
|
||||
}
|
||||
map<string, string>::const_iterator it = map_.find(key);
|
||||
if(map_.end() != it) {
|
||||
return it->second.c_str();
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
string GetConfigInfo() const {
|
||||
string res;
|
||||
res << *this;
|
||||
return res;
|
||||
string GetConfigInfo() const {
|
||||
string res;
|
||||
res << *this;
|
||||
return res;
|
||||
}
|
||||
|
||||
private:
|
||||
void LoadFile(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
assert(ifs);
|
||||
string line;
|
||||
vector<string> vecBuf;
|
||||
size_t lineno = 0;
|
||||
while(getline(ifs, line)) {
|
||||
lineno ++;
|
||||
Trim(line);
|
||||
if(line.empty() || StartsWith(line, "#")) {
|
||||
continue;
|
||||
}
|
||||
vecBuf.clear();
|
||||
Split(line, vecBuf, "=");
|
||||
if(2 != vecBuf.size()) {
|
||||
fprintf(stderr, "line[%s] illegal.\n", line.c_str());
|
||||
assert(false);
|
||||
continue;
|
||||
}
|
||||
string& key = vecBuf[0];
|
||||
string& value = vecBuf[1];
|
||||
Trim(key);
|
||||
Trim(value);
|
||||
if(!map_.insert(make_pair(key, value)).second) {
|
||||
fprintf(stderr, "key[%s] already exits.\n", key.c_str());
|
||||
assert(false);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
ifs.close();
|
||||
}
|
||||
|
||||
private:
|
||||
void LoadFile(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
assert(ifs);
|
||||
string line;
|
||||
vector<string> vecBuf;
|
||||
size_t lineno = 0;
|
||||
while(getline(ifs, line)) {
|
||||
lineno ++;
|
||||
Trim(line);
|
||||
if(line.empty() || StartsWith(line, "#")) {
|
||||
continue;
|
||||
}
|
||||
vecBuf.clear();
|
||||
Split(line, vecBuf, "=");
|
||||
if(2 != vecBuf.size()) {
|
||||
fprintf(stderr, "line[%s] illegal.\n", line.c_str());
|
||||
assert(false);
|
||||
continue;
|
||||
}
|
||||
string& key = vecBuf[0];
|
||||
string& value = vecBuf[1];
|
||||
Trim(key);
|
||||
Trim(value);
|
||||
if(!map_.insert(make_pair(key, value)).second) {
|
||||
fprintf(stderr, "key[%s] already exits.\n", key.c_str());
|
||||
assert(false);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
ifs.close();
|
||||
}
|
||||
friend ostream& operator << (ostream& os, const Config& config);
|
||||
|
||||
friend ostream& operator << (ostream& os, const Config& config);
|
||||
|
||||
map<string, string> map_;
|
||||
map<string, string> map_;
|
||||
}; // class Config
|
||||
|
||||
inline ostream& operator << (ostream& os, const Config& config) {
|
||||
return os << config.map_;
|
||||
return os << config.map_;
|
||||
}
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_FILELOCK_HPP
|
||||
#define LIMONP_FILELOCK_HPP
|
||||
|
||||
|
@ -33,58 +15,58 @@ namespace limonp {
|
|||
using std::string;
|
||||
|
||||
class FileLock {
|
||||
public:
|
||||
FileLock() : fd_(-1), ok_(true) {
|
||||
public:
|
||||
FileLock() : fd_(-1), ok_(true) {
|
||||
}
|
||||
~FileLock() {
|
||||
if(fd_ > 0) {
|
||||
Close();
|
||||
}
|
||||
~FileLock() {
|
||||
if(fd_ > 0) {
|
||||
Close();
|
||||
}
|
||||
}
|
||||
void Open(const string& fname) {
|
||||
assert(fd_ == -1);
|
||||
fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
|
||||
if(fd_ < 0) {
|
||||
ok_ = false;
|
||||
err_ = strerror(errno);
|
||||
}
|
||||
void Open(const string& fname) {
|
||||
assert(fd_ == -1);
|
||||
fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
|
||||
if(fd_ < 0) {
|
||||
ok_ = false;
|
||||
err_ = strerror(errno);
|
||||
}
|
||||
}
|
||||
void Close() {
|
||||
::close(fd_);
|
||||
}
|
||||
void Lock() {
|
||||
if(LockOrUnlock(fd_, true) < 0) {
|
||||
ok_ = false;
|
||||
err_ = strerror(errno);
|
||||
}
|
||||
void Close() {
|
||||
::close(fd_);
|
||||
}
|
||||
void Lock() {
|
||||
if(LockOrUnlock(fd_, true) < 0) {
|
||||
ok_ = false;
|
||||
err_ = strerror(errno);
|
||||
}
|
||||
}
|
||||
void UnLock() {
|
||||
if(LockOrUnlock(fd_, false) < 0) {
|
||||
ok_ = false;
|
||||
err_ = strerror(errno);
|
||||
}
|
||||
}
|
||||
bool Ok() const {
|
||||
return ok_;
|
||||
}
|
||||
string Error() const {
|
||||
return err_;
|
||||
}
|
||||
private:
|
||||
static int LockOrUnlock(int fd, bool lock) {
|
||||
errno = 0;
|
||||
struct flock f;
|
||||
memset(&f, 0, sizeof(f));
|
||||
f.l_type = (lock ? F_WRLCK : F_UNLCK);
|
||||
f.l_whence = SEEK_SET;
|
||||
f.l_start = 0;
|
||||
f.l_len = 0; // Lock/unlock entire file
|
||||
return fcntl(fd, F_SETLK, &f);
|
||||
}
|
||||
void UnLock() {
|
||||
if(LockOrUnlock(fd_, false) < 0) {
|
||||
ok_ = false;
|
||||
err_ = strerror(errno);
|
||||
}
|
||||
}
|
||||
bool Ok() const {
|
||||
return ok_;
|
||||
}
|
||||
string Error() const {
|
||||
return err_;
|
||||
}
|
||||
private:
|
||||
static int LockOrUnlock(int fd, bool lock) {
|
||||
errno = 0;
|
||||
struct flock f;
|
||||
memset(&f, 0, sizeof(f));
|
||||
f.l_type = (lock ? F_WRLCK : F_UNLCK);
|
||||
f.l_whence = SEEK_SET;
|
||||
f.l_start = 0;
|
||||
f.l_len = 0; // Lock/unlock entire file
|
||||
return fcntl(fd, F_SETLK, &f);
|
||||
}
|
||||
|
||||
int fd_;
|
||||
bool ok_;
|
||||
string err_;
|
||||
int fd_;
|
||||
bool ok_;
|
||||
string err_;
|
||||
}; // class FileLock
|
||||
|
||||
}// namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_FORCE_PUBLIC_H
|
||||
#define LIMONP_FORCE_PUBLIC_H
|
||||
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_LOCAL_VECTOR_HPP
|
||||
#define LIMONP_LOCAL_VECTOR_HPP
|
||||
|
||||
|
@ -33,123 +15,126 @@ using namespace std;
|
|||
const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
|
||||
template <class T>
|
||||
class LocalVector {
|
||||
public:
|
||||
typedef const T* const_iterator ;
|
||||
typedef T value_type;
|
||||
typedef size_t size_type;
|
||||
private:
|
||||
T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
|
||||
T * ptr_;
|
||||
size_t size_;
|
||||
size_t capacity_;
|
||||
public:
|
||||
LocalVector() {
|
||||
init_();
|
||||
};
|
||||
LocalVector(const LocalVector<T>& vec) {
|
||||
init_();
|
||||
*this = vec;
|
||||
public:
|
||||
typedef const T* const_iterator ;
|
||||
typedef T value_type;
|
||||
typedef size_t size_type;
|
||||
private:
|
||||
T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
|
||||
T * ptr_;
|
||||
size_t size_;
|
||||
size_t capacity_;
|
||||
public:
|
||||
LocalVector() {
|
||||
init_();
|
||||
};
|
||||
LocalVector(const LocalVector<T>& vec) {
|
||||
init_();
|
||||
*this = vec;
|
||||
}
|
||||
LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster
|
||||
init_();
|
||||
while(begin != end) {
|
||||
push_back(*begin++);
|
||||
}
|
||||
LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster
|
||||
init_();
|
||||
while(begin != end) {
|
||||
push_back(*begin++);
|
||||
}
|
||||
}
|
||||
LocalVector(size_t size, const T& t) { // TODO: make it faster
|
||||
init_();
|
||||
while(size--) {
|
||||
push_back(t);
|
||||
}
|
||||
LocalVector(size_t size, const T& t) { // TODO: make it faster
|
||||
init_();
|
||||
while(size--) {
|
||||
push_back(t);
|
||||
}
|
||||
}
|
||||
~LocalVector() {
|
||||
if(ptr_ != buffer_) {
|
||||
free(ptr_);
|
||||
}
|
||||
~LocalVector() {
|
||||
if(ptr_ != buffer_) {
|
||||
free(ptr_);
|
||||
}
|
||||
};
|
||||
public:
|
||||
LocalVector<T>& operator = (const LocalVector<T>& vec) {
|
||||
clear();
|
||||
size_ = vec.size();
|
||||
capacity_ = vec.capacity();
|
||||
if(vec.buffer_ == vec.ptr_) {
|
||||
memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
|
||||
ptr_ = buffer_;
|
||||
} else {
|
||||
ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
|
||||
assert(ptr_);
|
||||
memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
|
||||
}
|
||||
return *this;
|
||||
};
|
||||
public:
|
||||
LocalVector<T>& operator = (const LocalVector<T>& vec) {
|
||||
if(this == &vec){
|
||||
return *this;
|
||||
}
|
||||
clear();
|
||||
size_ = vec.size();
|
||||
capacity_ = vec.capacity();
|
||||
if(vec.buffer_ == vec.ptr_) {
|
||||
memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
|
||||
ptr_ = buffer_;
|
||||
} else {
|
||||
ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
|
||||
assert(ptr_);
|
||||
memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
|
||||
}
|
||||
private:
|
||||
void init_() {
|
||||
ptr_ = buffer_;
|
||||
size_ = 0;
|
||||
capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
|
||||
return *this;
|
||||
}
|
||||
private:
|
||||
void init_() {
|
||||
ptr_ = buffer_;
|
||||
size_ = 0;
|
||||
capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
|
||||
}
|
||||
public:
|
||||
T& operator [] (size_t i) {
|
||||
return ptr_[i];
|
||||
}
|
||||
const T& operator [] (size_t i) const {
|
||||
return ptr_[i];
|
||||
}
|
||||
void push_back(const T& t) {
|
||||
if(size_ == capacity_) {
|
||||
assert(capacity_);
|
||||
reserve(capacity_ * 2);
|
||||
}
|
||||
public:
|
||||
T& operator [](size_t i) {
|
||||
return ptr_[i];
|
||||
ptr_[size_ ++ ] = t;
|
||||
}
|
||||
void reserve(size_t size) {
|
||||
if(size <= capacity_) {
|
||||
return;
|
||||
}
|
||||
const T& operator [](size_t i) const {
|
||||
return ptr_[i];
|
||||
T * next = (T*)malloc(sizeof(T) * size);
|
||||
assert(next);
|
||||
T * old = ptr_;
|
||||
ptr_ = next;
|
||||
memcpy(ptr_, old, sizeof(T) * capacity_);
|
||||
capacity_ = size;
|
||||
if(old != buffer_) {
|
||||
free(old);
|
||||
}
|
||||
void push_back(const T& t) {
|
||||
if(size_ == capacity_) {
|
||||
assert(capacity_);
|
||||
reserve(capacity_ * 2);
|
||||
}
|
||||
ptr_[size_ ++ ] = t;
|
||||
}
|
||||
void reserve(size_t size) {
|
||||
if(size <= capacity_) {
|
||||
return;
|
||||
}
|
||||
T * next = (T*)malloc(sizeof(T) * size);
|
||||
assert(next);
|
||||
T * old = ptr_;
|
||||
ptr_ = next;
|
||||
memcpy(ptr_, old, sizeof(T) * capacity_);
|
||||
capacity_ = size;
|
||||
if(old != buffer_) {
|
||||
free(old);
|
||||
}
|
||||
}
|
||||
bool empty() const {
|
||||
return 0 == size();
|
||||
}
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
size_t capacity() const {
|
||||
return capacity_;
|
||||
}
|
||||
const_iterator begin() const {
|
||||
return ptr_;
|
||||
}
|
||||
const_iterator end() const {
|
||||
return ptr_ + size_;
|
||||
}
|
||||
void clear() {
|
||||
if(ptr_ != buffer_) {
|
||||
free(ptr_);
|
||||
}
|
||||
init_();
|
||||
}
|
||||
bool empty() const {
|
||||
return 0 == size();
|
||||
}
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
size_t capacity() const {
|
||||
return capacity_;
|
||||
}
|
||||
const_iterator begin() const {
|
||||
return ptr_;
|
||||
}
|
||||
const_iterator end() const {
|
||||
return ptr_ + size_;
|
||||
}
|
||||
void clear() {
|
||||
if(ptr_ != buffer_) {
|
||||
free(ptr_);
|
||||
}
|
||||
init_();
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
ostream & operator << (ostream& os, const LocalVector<T>& vec) {
|
||||
if(vec.empty()) {
|
||||
return os << "[]";
|
||||
}
|
||||
os << "[\"" << vec[0];
|
||||
for(size_t i = 1; i < vec.size(); i++) {
|
||||
os << "\", \"" << vec[i];
|
||||
}
|
||||
os << "\"]";
|
||||
return os;
|
||||
if(vec.empty()) {
|
||||
return os << "[]";
|
||||
}
|
||||
os<<"[\""<<vec[0];
|
||||
for(size_t i = 1; i < vec.size(); i++) {
|
||||
os<<"\", \""<<vec[i];
|
||||
}
|
||||
os<<"\"]";
|
||||
return os;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_LOGGING_HPP
|
||||
#define LIMONP_LOGGING_HPP
|
||||
|
||||
|
@ -38,55 +20,56 @@
|
|||
namespace limonp {
|
||||
|
||||
enum {
|
||||
LL_DEBUG = 0,
|
||||
LL_INFO = 1,
|
||||
LL_WARNING = 2,
|
||||
LL_ERROR = 3,
|
||||
LL_FATAL = 4,
|
||||
LL_DEBUG = 0,
|
||||
LL_INFO = 1,
|
||||
LL_WARNING = 2,
|
||||
LL_ERROR = 3,
|
||||
LL_FATAL = 4,
|
||||
}; // enum
|
||||
|
||||
static const char * LOG_LEVEL_ARRAY[] = {"DEBUG", "INFO", "WARN", "ERROR", "FATAL"};
|
||||
static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S";
|
||||
static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
|
||||
|
||||
class Logger {
|
||||
public:
|
||||
Logger(size_t level, const char* filename, int lineno)
|
||||
: level_(level) {
|
||||
public:
|
||||
Logger(size_t level, const char* filename, int lineno)
|
||||
: level_(level) {
|
||||
#ifdef LOGGING_LEVEL
|
||||
if(level_ < LOGGING_LEVEL) {
|
||||
return;
|
||||
}
|
||||
if (level_ < LOGGING_LEVEL) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
assert(level_ <= sizeof(LOG_LEVEL_ARRAY) / sizeof(*LOG_LEVEL_ARRAY));
|
||||
char buf[32];
|
||||
time_t now;
|
||||
time(&now);
|
||||
strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&now));
|
||||
stream_ << buf
|
||||
<< " " << filename
|
||||
<< ":" << lineno
|
||||
<< " " << LOG_LEVEL_ARRAY[level_]
|
||||
<< " ";
|
||||
}
|
||||
~Logger() {
|
||||
assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
|
||||
char buf[32];
|
||||
time_t now;
|
||||
time(&now);
|
||||
struct tm result;
|
||||
localtime_r(&now, &result);
|
||||
strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", &result);
|
||||
stream_ << buf
|
||||
<< " " << filename
|
||||
<< ":" << lineno
|
||||
<< " " << LOG_LEVEL_ARRAY[level_]
|
||||
<< " ";
|
||||
}
|
||||
~Logger() {
|
||||
#ifdef LOGGING_LEVEL
|
||||
if(level_ < LOGGING_LEVEL) {
|
||||
return;
|
||||
}
|
||||
if (level_ < LOGGING_LEVEL) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
std::cerr << stream_.str() << std::endl;
|
||||
if(level_ == LL_FATAL) {
|
||||
abort();
|
||||
}
|
||||
std::cerr << stream_.str() << std::endl;
|
||||
if (level_ == LL_FATAL) {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& Stream() {
|
||||
return stream_;
|
||||
}
|
||||
std::ostream& Stream() {
|
||||
return stream_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::ostringstream stream_;
|
||||
size_t level_;
|
||||
private:
|
||||
std::ostringstream stream_;
|
||||
size_t level_;
|
||||
}; // class Logger
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_MUTEX_LOCK_HPP
|
||||
#define LIMONP_MUTEX_LOCK_HPP
|
||||
|
||||
|
@ -26,40 +8,40 @@
|
|||
namespace limonp {
|
||||
|
||||
class MutexLock: NonCopyable {
|
||||
public:
|
||||
MutexLock() {
|
||||
XCHECK(!pthread_mutex_init(&mutex_, NULL));
|
||||
}
|
||||
~MutexLock() {
|
||||
XCHECK(!pthread_mutex_destroy(&mutex_));
|
||||
}
|
||||
pthread_mutex_t* GetPthreadMutex() {
|
||||
return &mutex_;
|
||||
}
|
||||
public:
|
||||
MutexLock() {
|
||||
XCHECK(!pthread_mutex_init(&mutex_, NULL));
|
||||
}
|
||||
~MutexLock() {
|
||||
XCHECK(!pthread_mutex_destroy(&mutex_));
|
||||
}
|
||||
pthread_mutex_t* GetPthreadMutex() {
|
||||
return &mutex_;
|
||||
}
|
||||
|
||||
private:
|
||||
void Lock() {
|
||||
XCHECK(!pthread_mutex_lock(&mutex_));
|
||||
}
|
||||
void Unlock() {
|
||||
XCHECK(!pthread_mutex_unlock(&mutex_));
|
||||
}
|
||||
friend class MutexLockGuard;
|
||||
private:
|
||||
void Lock() {
|
||||
XCHECK(!pthread_mutex_lock(&mutex_));
|
||||
}
|
||||
void Unlock() {
|
||||
XCHECK(!pthread_mutex_unlock(&mutex_));
|
||||
}
|
||||
friend class MutexLockGuard;
|
||||
|
||||
pthread_mutex_t mutex_;
|
||||
pthread_mutex_t mutex_;
|
||||
}; // class MutexLock
|
||||
|
||||
class MutexLockGuard: NonCopyable {
|
||||
public:
|
||||
explicit MutexLockGuard(MutexLock & mutex)
|
||||
: mutex_(mutex) {
|
||||
mutex_.Lock();
|
||||
}
|
||||
~MutexLockGuard() {
|
||||
mutex_.Unlock();
|
||||
}
|
||||
private:
|
||||
MutexLock & mutex_;
|
||||
public:
|
||||
explicit MutexLockGuard(MutexLock & mutex)
|
||||
: mutex_(mutex) {
|
||||
mutex_.Lock();
|
||||
}
|
||||
~MutexLockGuard() {
|
||||
mutex_.Unlock();
|
||||
}
|
||||
private:
|
||||
MutexLock & mutex_;
|
||||
}; // class MutexLockGuard
|
||||
|
||||
#define MutexLockGuard(x) XCHECK(false);
|
||||
|
|
|
@ -1,35 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
/************************************
|
||||
************************************/
|
||||
#ifndef LIMONP_NONCOPYABLE_H
|
||||
#define LIMONP_NONCOPYABLE_H
|
||||
|
||||
namespace limonp {
|
||||
|
||||
class NonCopyable {
|
||||
protected:
|
||||
NonCopyable() {
|
||||
}
|
||||
~NonCopyable() {
|
||||
}
|
||||
private:
|
||||
NonCopyable(const NonCopyable&);
|
||||
const NonCopyable& operator=(const NonCopyable&);
|
||||
protected:
|
||||
NonCopyable() {
|
||||
}
|
||||
~NonCopyable() {
|
||||
}
|
||||
private:
|
||||
NonCopyable(const NonCopyable& );
|
||||
const NonCopyable& operator=(const NonCopyable& );
|
||||
}; // class NonCopyable
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_STD_EXTEMSION_HPP
|
||||
#define LIMONP_STD_EXTEMSION_HPP
|
||||
|
||||
|
@ -51,123 +33,123 @@ namespace std {
|
|||
|
||||
template<typename T>
|
||||
ostream& operator << (ostream& os, const vector<T>& v) {
|
||||
if(v.empty()) {
|
||||
return os << "[]";
|
||||
}
|
||||
os << "[" << v[0];
|
||||
for(size_t i = 1; i < v.size(); i++) {
|
||||
os << ", " << v[i];
|
||||
}
|
||||
os << "]";
|
||||
return os;
|
||||
if(v.empty()) {
|
||||
return os << "[]";
|
||||
}
|
||||
os<<"["<<v[0];
|
||||
for(size_t i = 1; i < v.size(); i++) {
|
||||
os<<", "<<v[i];
|
||||
}
|
||||
os<<"]";
|
||||
return os;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline ostream& operator << (ostream& os, const vector<string>& v) {
|
||||
if(v.empty()) {
|
||||
return os << "[]";
|
||||
}
|
||||
os << "[\"" << v[0];
|
||||
for(size_t i = 1; i < v.size(); i++) {
|
||||
os << "\", \"" << v[i];
|
||||
}
|
||||
os << "\"]";
|
||||
return os;
|
||||
if(v.empty()) {
|
||||
return os << "[]";
|
||||
}
|
||||
os<<"[\""<<v[0];
|
||||
for(size_t i = 1; i < v.size(); i++) {
|
||||
os<<"\", \""<<v[i];
|
||||
}
|
||||
os<<"\"]";
|
||||
return os;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ostream& operator << (ostream& os, const deque<T>& dq) {
|
||||
if(dq.empty()) {
|
||||
return os << "[]";
|
||||
}
|
||||
os << "[\"" << dq[0];
|
||||
for(size_t i = 1; i < dq.size(); i++) {
|
||||
os << "\", \"" << dq[i];
|
||||
}
|
||||
os << "\"]";
|
||||
return os;
|
||||
if(dq.empty()) {
|
||||
return os << "[]";
|
||||
}
|
||||
os<<"[\""<<dq[0];
|
||||
for(size_t i = 1; i < dq.size(); i++) {
|
||||
os<<"\", \""<<dq[i];
|
||||
}
|
||||
os<<"\"]";
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
template<class T1, class T2>
|
||||
ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
|
||||
os << pr.first << ":" << pr.second ;
|
||||
return os;
|
||||
os << pr.first << ":" << pr.second ;
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
template<class T>
|
||||
string& operator << (string& str, const T& obj) {
|
||||
stringstream ss;
|
||||
ss << obj; // call ostream& operator << (ostream& os,
|
||||
return str = ss.str();
|
||||
stringstream ss;
|
||||
ss << obj; // call ostream& operator << (ostream& os,
|
||||
return str = ss.str();
|
||||
}
|
||||
|
||||
template<class T1, class T2>
|
||||
ostream& operator << (ostream& os, const map<T1, T2>& mp) {
|
||||
if(mp.empty()) {
|
||||
os << "{}";
|
||||
return os;
|
||||
}
|
||||
os << '{';
|
||||
typename map<T1, T2>::const_iterator it = mp.begin();
|
||||
os << *it;
|
||||
it++;
|
||||
while(it != mp.end()) {
|
||||
os << ", " << *it;
|
||||
it++;
|
||||
}
|
||||
os << '}';
|
||||
if(mp.empty()) {
|
||||
os<<"{}";
|
||||
return os;
|
||||
}
|
||||
os<<'{';
|
||||
typename map<T1, T2>::const_iterator it = mp.begin();
|
||||
os<<*it;
|
||||
it++;
|
||||
while(it != mp.end()) {
|
||||
os<<", "<<*it;
|
||||
it++;
|
||||
}
|
||||
os<<'}';
|
||||
return os;
|
||||
}
|
||||
template<class T1, class T2>
|
||||
ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
|
||||
if(mp.empty()) {
|
||||
return os << "{}";
|
||||
}
|
||||
os << '{';
|
||||
typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
|
||||
os << *it;
|
||||
it++;
|
||||
while(it != mp.end()) {
|
||||
os << ", " << *it++;
|
||||
}
|
||||
return os << '}';
|
||||
if(mp.empty()) {
|
||||
return os << "{}";
|
||||
}
|
||||
os<<'{';
|
||||
typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
|
||||
os<<*it;
|
||||
it++;
|
||||
while(it != mp.end()) {
|
||||
os<<", "<<*it++;
|
||||
}
|
||||
return os<<'}';
|
||||
}
|
||||
|
||||
template<class T>
|
||||
ostream& operator << (ostream& os, const set<T>& st) {
|
||||
if(st.empty()) {
|
||||
os << "{}";
|
||||
return os;
|
||||
}
|
||||
os << '{';
|
||||
typename set<T>::const_iterator it = st.begin();
|
||||
os << *it;
|
||||
it++;
|
||||
while(it != st.end()) {
|
||||
os << ", " << *it;
|
||||
it++;
|
||||
}
|
||||
os << '}';
|
||||
if(st.empty()) {
|
||||
os << "{}";
|
||||
return os;
|
||||
}
|
||||
os<<'{';
|
||||
typename set<T>::const_iterator it = st.begin();
|
||||
os<<*it;
|
||||
it++;
|
||||
while(it != st.end()) {
|
||||
os<<", "<<*it;
|
||||
it++;
|
||||
}
|
||||
os<<'}';
|
||||
return os;
|
||||
}
|
||||
|
||||
template<class KeyType, class ContainType>
|
||||
bool IsIn(const ContainType& contain, const KeyType& key) {
|
||||
return contain.end() != contain.find(key);
|
||||
return contain.end() != contain.find(key);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
|
||||
return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
|
||||
return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
|
||||
}
|
||||
|
||||
template<class T>
|
||||
ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
|
||||
ostreambuf_iterator<T> itr(ofs);
|
||||
copy(s.begin(), s.end(), itr);
|
||||
return ofs;
|
||||
ostreambuf_iterator<T> itr (ofs);
|
||||
copy(s.begin(), s.end(), itr);
|
||||
return ofs;
|
||||
}
|
||||
|
||||
} // namespace std
|
||||
|
|
|
@ -1,27 +1,14 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
/************************************
|
||||
* file enc : ascii
|
||||
* author : wuyanyi09@gmail.com
|
||||
************************************/
|
||||
#ifndef LIMONP_STR_FUNCTS_H
|
||||
#define LIMONP_STR_FUNCTS_H
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <memory.h>
|
||||
#include <sys/types.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
@ -29,14 +16,9 @@
|
|||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <map>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <memory.h>
|
||||
#include <functional>
|
||||
#include <locale>
|
||||
#include <sstream>
|
||||
#include <sys/types.h>
|
||||
#include <iterator>
|
||||
#include <algorithm>
|
||||
#include "StdExtension.hpp"
|
||||
|
@ -44,339 +26,356 @@
|
|||
namespace limonp {
|
||||
using namespace std;
|
||||
inline string StringFormat(const char* fmt, ...) {
|
||||
int size = 256;
|
||||
std::string str;
|
||||
va_list ap;
|
||||
while(1) {
|
||||
str.resize(size);
|
||||
va_start(ap, fmt);
|
||||
int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
|
||||
va_end(ap);
|
||||
if(n > -1 && n < size) {
|
||||
str.resize(n);
|
||||
return str;
|
||||
}
|
||||
if(n > -1)
|
||||
size = n + 1;
|
||||
else
|
||||
size *= 2;
|
||||
int size = 256;
|
||||
std::string str;
|
||||
va_list ap;
|
||||
while (1) {
|
||||
str.resize(size);
|
||||
va_start(ap, fmt);
|
||||
int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
|
||||
va_end(ap);
|
||||
if (n > -1 && n < size) {
|
||||
str.resize(n);
|
||||
return str;
|
||||
}
|
||||
return str;
|
||||
if (n > -1)
|
||||
size = n + 1;
|
||||
else
|
||||
size *= 2;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void Join(T begin, T end, string& res, const string& connector) {
|
||||
if(begin == end) {
|
||||
return;
|
||||
}
|
||||
stringstream ss;
|
||||
ss << *begin;
|
||||
begin++;
|
||||
while(begin != end) {
|
||||
ss << connector << *begin;
|
||||
begin ++;
|
||||
}
|
||||
res = ss.str();
|
||||
if(begin == end) {
|
||||
return;
|
||||
}
|
||||
stringstream ss;
|
||||
ss<<*begin;
|
||||
begin++;
|
||||
while(begin != end) {
|
||||
ss << connector << *begin;
|
||||
begin ++;
|
||||
}
|
||||
res = ss.str();
|
||||
}
|
||||
|
||||
template<class T>
|
||||
string Join(T begin, T end, const string& connector) {
|
||||
string res;
|
||||
Join(begin, end, res, connector);
|
||||
return res;
|
||||
string res;
|
||||
Join(begin ,end, res, connector);
|
||||
return res;
|
||||
}
|
||||
|
||||
inline string& Upper(string& str) {
|
||||
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
|
||||
return str;
|
||||
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
|
||||
return str;
|
||||
}
|
||||
|
||||
inline string& Lower(string& str) {
|
||||
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
|
||||
return str;
|
||||
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
|
||||
return str;
|
||||
}
|
||||
|
||||
inline bool IsSpace(unsigned c) {
|
||||
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
|
||||
return c > 0xff ? false : std::isspace(c & 0xff) != 0;
|
||||
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
|
||||
return c > 0xff ? false : std::isspace(c & 0xff);
|
||||
}
|
||||
|
||||
inline std::string& LTrim(std::string &s) {
|
||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
|
||||
return s;
|
||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::string& RTrim(std::string &s) {
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
|
||||
return s;
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::string& Trim(std::string &s) {
|
||||
return LTrim(RTrim(s));
|
||||
return LTrim(RTrim(s));
|
||||
}
|
||||
|
||||
inline std::string& LTrim(std::string & s, char x) {
|
||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
|
||||
return s;
|
||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::string& RTrim(std::string & s, char x) {
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
|
||||
return s;
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::string& Trim(std::string &s, char x) {
|
||||
return LTrim(RTrim(s, x), x);
|
||||
return LTrim(RTrim(s, x), x);
|
||||
}
|
||||
|
||||
inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
|
||||
res.clear();
|
||||
size_t Start = 0;
|
||||
size_t end = 0;
|
||||
string sub;
|
||||
while(Start < src.size()) {
|
||||
end = src.find_first_of(pattern, Start);
|
||||
if(string::npos == end || res.size() >= maxsplit) {
|
||||
sub = src.substr(Start);
|
||||
res.push_back(sub);
|
||||
return;
|
||||
}
|
||||
sub = src.substr(Start, end - Start);
|
||||
res.push_back(sub);
|
||||
Start = end + 1;
|
||||
res.clear();
|
||||
size_t Start = 0;
|
||||
size_t end = 0;
|
||||
string sub;
|
||||
while(Start < src.size()) {
|
||||
end = src.find_first_of(pattern, Start);
|
||||
if(string::npos == end || res.size() >= maxsplit) {
|
||||
sub = src.substr(Start);
|
||||
res.push_back(sub);
|
||||
return;
|
||||
}
|
||||
return;
|
||||
sub = src.substr(Start, end - Start);
|
||||
res.push_back(sub);
|
||||
Start = end + 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
|
||||
vector<string> res;
|
||||
Split(src, res, pattern, maxsplit);
|
||||
return res;
|
||||
vector<string> res;
|
||||
Split(src, res, pattern, maxsplit);
|
||||
return res;
|
||||
}
|
||||
|
||||
inline bool StartsWith(const string& str, const string& prefix) {
|
||||
if(prefix.length() > str.length()) {
|
||||
return false;
|
||||
}
|
||||
return 0 == str.compare(0, prefix.length(), prefix);
|
||||
if(prefix.length() > str.length()) {
|
||||
return false;
|
||||
}
|
||||
return 0 == str.compare(0, prefix.length(), prefix);
|
||||
}
|
||||
|
||||
inline bool EndsWith(const string& str, const string& suffix) {
|
||||
if(suffix.length() > str.length()) {
|
||||
return false;
|
||||
}
|
||||
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
|
||||
if(suffix.length() > str.length()) {
|
||||
return false;
|
||||
}
|
||||
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
|
||||
}
|
||||
|
||||
inline bool IsInStr(const string& str, char ch) {
|
||||
return str.find(ch) != string::npos;
|
||||
return str.find(ch) != string::npos;
|
||||
}
|
||||
|
||||
inline uint16_t TwocharToUint16(char high, char low) {
|
||||
return (((uint16_t(high) & 0x00ff) << 8) | (uint16_t(low) & 0x00ff));
|
||||
return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
|
||||
}
|
||||
|
||||
template <class Uint16Container>
|
||||
bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
|
||||
if(!str) {
|
||||
return false;
|
||||
if(!str) {
|
||||
return false;
|
||||
}
|
||||
char ch1, ch2;
|
||||
uint16_t tmp;
|
||||
vec.clear();
|
||||
for(size_t i = 0; i < len;) {
|
||||
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
||||
vec.push_back(str[i]);
|
||||
i++;
|
||||
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
|
||||
ch1 = (str[i] >> 2) & 0x07;
|
||||
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
|
||||
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
||||
vec.push_back(tmp);
|
||||
i += 2;
|
||||
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
|
||||
ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
|
||||
ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
|
||||
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
|
||||
vec.push_back(tmp);
|
||||
i += 3;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
char ch1, ch2;
|
||||
uint16_t tmp;
|
||||
vec.clear();
|
||||
for(size_t i = 0; i < len;) {
|
||||
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
||||
vec.push_back(str[i]);
|
||||
i++;
|
||||
} else if((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
|
||||
ch1 = (str[i] >> 2) & 0x07;
|
||||
ch2 = (str[i + 1] & 0x3f) | ((str[i] & 0x03) << 6);
|
||||
tmp = (((uint16_t(ch1) & 0x00ff) << 8) | (uint16_t(ch2) & 0x00ff));
|
||||
vec.push_back(tmp);
|
||||
i += 2;
|
||||
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
|
||||
ch1 = ((uint8_t)str[i] << 4) | ((str[i + 1] >> 2) & 0x0f);
|
||||
ch2 = (((uint8_t)str[i + 1] << 6) & 0xc0) | (str[i + 2] & 0x3f);
|
||||
tmp = (((uint16_t(ch1) & 0x00ff) << 8) | (uint16_t(ch2) & 0x00ff));
|
||||
vec.push_back(tmp);
|
||||
i += 3;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Uint16Container>
|
||||
bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
|
||||
return Utf8ToUnicode(str.c_str(), str.size(), vec);
|
||||
return Utf8ToUnicode(str.c_str(), str.size(), vec);
|
||||
}
|
||||
|
||||
template <class Uint32Container>
|
||||
bool Utf8ToUnicode32(const char * str, size_t size, Uint32Container& vec) {
|
||||
uint32_t tmp;
|
||||
vec.clear();
|
||||
for(size_t i = 0; i < size;) {
|
||||
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
||||
// 7bit, total 7bit
|
||||
tmp = (uint8_t)(str[i]) & 0x7f;
|
||||
i++;
|
||||
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < size) { // 110xxxxxx
|
||||
// 5bit, total 5bit
|
||||
tmp = (uint8_t)(str[i]) & 0x1f;
|
||||
|
||||
// 6bit, total 11bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
||||
i += 2;
|
||||
} else if((uint8_t)str[i] <= 0xef && i + 2 < size) { // 1110xxxxxx
|
||||
// 4bit, total 4bit
|
||||
tmp = (uint8_t)(str[i]) & 0x0f;
|
||||
|
||||
// 6bit, total 10bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
||||
|
||||
// 6bit, total 16bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
||||
|
||||
i += 3;
|
||||
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < size) { // 11110xxxx
|
||||
// 3bit, total 3bit
|
||||
tmp = (uint8_t)(str[i]) & 0x07;
|
||||
|
||||
// 6bit, total 9bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
||||
|
||||
// 6bit, total 15bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
||||
|
||||
// 6bit, total 21bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+3]) & 0x3f;
|
||||
|
||||
i += 4;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
vec.push_back(tmp);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Uint32Container>
|
||||
bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
|
||||
uint32_t tmp;
|
||||
vec.clear();
|
||||
for(size_t i = 0; i < str.size();) {
|
||||
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
||||
// 7bit, total 7bit
|
||||
tmp = (uint8_t)(str[i]) & 0x7f;
|
||||
i++;
|
||||
} else if((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
|
||||
// 5bit, total 5bit
|
||||
tmp = (uint8_t)(str[i]) & 0x1f;
|
||||
return Utf8ToUnicode32(str.data(), str.size(), vec);
|
||||
}
|
||||
|
||||
// 6bit, total 11bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
|
||||
i += 2;
|
||||
} else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
|
||||
// 4bit, total 4bit
|
||||
tmp = (uint8_t)(str[i]) & 0x0f;
|
||||
|
||||
// 6bit, total 10bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
|
||||
|
||||
// 6bit, total 16bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i + 2]) & 0x3f;
|
||||
|
||||
i += 3;
|
||||
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
|
||||
// 3bit, total 3bit
|
||||
tmp = (uint8_t)(str[i]) & 0x07;
|
||||
|
||||
// 6bit, total 9bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
|
||||
|
||||
// 6bit, total 15bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i + 2]) & 0x3f;
|
||||
|
||||
// 6bit, total 21bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i + 3]) & 0x3f;
|
||||
|
||||
i += 4;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
vec.push_back(tmp);
|
||||
inline int UnicodeToUtf8Bytes(uint32_t ui){
|
||||
if(ui <= 0x7f) {
|
||||
return 1;
|
||||
} else if(ui <= 0x7ff) {
|
||||
return 2;
|
||||
} else if(ui <= 0xffff) {
|
||||
return 3;
|
||||
} else {
|
||||
return 4;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Uint32ContainerConIter>
|
||||
void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
|
||||
res.clear();
|
||||
uint32_t ui;
|
||||
while(begin != end) {
|
||||
ui = *begin;
|
||||
if(ui <= 0x7f) {
|
||||
res += char(ui);
|
||||
} else if(ui <= 0x7ff) {
|
||||
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
} else if(ui <= 0xffff) {
|
||||
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
||||
res += char(((ui >> 6) & 0x3f) | 0x80);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
} else {
|
||||
res += char(((ui >> 18) & 0x03) | 0xf0);
|
||||
res += char(((ui >> 12) & 0x3f) | 0x80);
|
||||
res += char(((ui >> 6) & 0x3f) | 0x80);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
}
|
||||
begin ++;
|
||||
res.clear();
|
||||
uint32_t ui;
|
||||
while(begin != end) {
|
||||
ui = *begin;
|
||||
if(ui <= 0x7f) {
|
||||
res += char(ui);
|
||||
} else if(ui <= 0x7ff) {
|
||||
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
} else if(ui <= 0xffff) {
|
||||
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
||||
res += char(((ui >> 6) & 0x3f) | 0x80);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
} else {
|
||||
res += char(((ui >> 18) & 0x03) | 0xf0);
|
||||
res += char(((ui >> 12) & 0x3f) | 0x80);
|
||||
res += char(((ui >> 6) & 0x3f) | 0x80);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
}
|
||||
begin ++;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Uint16ContainerConIter>
|
||||
void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
||||
res.clear();
|
||||
uint16_t ui;
|
||||
while(begin != end) {
|
||||
ui = *begin;
|
||||
if(ui <= 0x7f) {
|
||||
res += char(ui);
|
||||
} else if(ui <= 0x7ff) {
|
||||
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
} else {
|
||||
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
||||
res += char(((ui >> 6) & 0x3f) | 0x80);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
}
|
||||
begin ++;
|
||||
res.clear();
|
||||
uint16_t ui;
|
||||
while(begin != end) {
|
||||
ui = *begin;
|
||||
if(ui <= 0x7f) {
|
||||
res += char(ui);
|
||||
} else if(ui <= 0x7ff) {
|
||||
res += char(((ui>>6) & 0x1f) | 0xc0);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
} else {
|
||||
res += char(((ui >> 12) & 0x0f )| 0xe0);
|
||||
res += char(((ui>>6) & 0x3f )| 0x80 );
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
}
|
||||
begin ++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class Uint16Container>
|
||||
bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
|
||||
vec.clear();
|
||||
if(!str) {
|
||||
return true;
|
||||
}
|
||||
size_t i = 0;
|
||||
while(i < len) {
|
||||
if(0 == (str[i] & 0x80)) {
|
||||
vec.push_back(uint16_t(str[i]));
|
||||
i++;
|
||||
} else {
|
||||
if(i + 1 < len) { //&& (str[i+1] & 0x80))
|
||||
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff) << 8) | (uint16_t(str[i + 1]) & 0x00ff));
|
||||
vec.push_back(tmp);
|
||||
i += 2;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
vec.clear();
|
||||
if(!str) {
|
||||
return true;
|
||||
}
|
||||
size_t i = 0;
|
||||
while(i < len) {
|
||||
if(0 == (str[i] & 0x80)) {
|
||||
vec.push_back(uint16_t(str[i]));
|
||||
i++;
|
||||
} else {
|
||||
if(i + 1 < len) { //&& (str[i+1] & 0x80))
|
||||
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
|
||||
vec.push_back(tmp);
|
||||
i += 2;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Uint16Container>
|
||||
bool GBKTrans(const string& str, Uint16Container& vec) {
|
||||
return GBKTrans(str.c_str(), str.size(), vec);
|
||||
return GBKTrans(str.c_str(), str.size(), vec);
|
||||
}
|
||||
|
||||
template <class Uint16ContainerConIter>
|
||||
void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
||||
res.clear();
|
||||
//pair<char, char> pa;
|
||||
char first, second;
|
||||
while(begin != end) {
|
||||
//pa = uint16ToChar2(*begin);
|
||||
first = ((*begin) >> 8) & 0x00ff;
|
||||
second = (*begin) & 0x00ff;
|
||||
if(first & 0x80) {
|
||||
res += first;
|
||||
res += second;
|
||||
} else {
|
||||
res += second;
|
||||
}
|
||||
begin++;
|
||||
res.clear();
|
||||
//pair<char, char> pa;
|
||||
char first, second;
|
||||
while(begin != end) {
|
||||
//pa = uint16ToChar2(*begin);
|
||||
first = ((*begin)>>8) & 0x00ff;
|
||||
second = (*begin) & 0x00ff;
|
||||
if(first & 0x80) {
|
||||
res += first;
|
||||
res += second;
|
||||
} else {
|
||||
res += second;
|
||||
}
|
||||
begin++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* format example: "%Y-%m-%d %H:%M:%S"
|
||||
*/
|
||||
inline void GetTime(const string& format, string& timeStr) {
|
||||
time_t timeNow;
|
||||
time(&timeNow);
|
||||
timeStr.resize(64);
|
||||
size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
|
||||
timeStr.resize(len);
|
||||
}
|
||||
// inline void GetTime(const string& format, string& timeStr) {
|
||||
// time_t timeNow;
|
||||
// time(&timeNow);
|
||||
// timeStr.resize(64);
|
||||
// size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
|
||||
// timeStr.resize(len);
|
||||
// }
|
||||
|
||||
inline string PathJoin(const string& path1, const string& path2) {
|
||||
if(EndsWith(path1, "/")) {
|
||||
return path1 + path2;
|
||||
}
|
||||
return path1 + "/" + path2;
|
||||
if(EndsWith(path1, "/")) {
|
||||
return path1 + path2;
|
||||
}
|
||||
return path1 + "/" + path2;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_THREAD_HPP
|
||||
#define LIMONP_THREAD_HPP
|
||||
|
||||
|
@ -25,36 +7,36 @@
|
|||
namespace limonp {
|
||||
|
||||
class IThread: NonCopyable {
|
||||
public:
|
||||
IThread(): isStarted(false), isJoined(false) {
|
||||
public:
|
||||
IThread(): isStarted(false), isJoined(false) {
|
||||
}
|
||||
virtual ~IThread() {
|
||||
if(isStarted && !isJoined) {
|
||||
XCHECK(!pthread_detach(thread_));
|
||||
}
|
||||
virtual ~IThread() {
|
||||
if(isStarted && !isJoined) {
|
||||
XCHECK(!pthread_detach(thread_));
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
virtual void Run() = 0;
|
||||
void Start() {
|
||||
XCHECK(!isStarted);
|
||||
XCHECK(!pthread_create(&thread_, NULL, Worker, this));
|
||||
isStarted = true;
|
||||
}
|
||||
void Join() {
|
||||
XCHECK(!isJoined);
|
||||
XCHECK(!pthread_join(thread_, NULL));
|
||||
isJoined = true;
|
||||
}
|
||||
private:
|
||||
static void * Worker(void * data) {
|
||||
IThread * ptr = (IThread*) data;
|
||||
ptr->Run();
|
||||
return NULL;
|
||||
}
|
||||
virtual void Run() = 0;
|
||||
void Start() {
|
||||
XCHECK(!isStarted);
|
||||
XCHECK(!pthread_create(&thread_, NULL, Worker, this));
|
||||
isStarted = true;
|
||||
}
|
||||
void Join() {
|
||||
XCHECK(!isJoined);
|
||||
XCHECK(!pthread_join(thread_, NULL));
|
||||
isJoined = true;
|
||||
}
|
||||
private:
|
||||
static void * Worker(void * data) {
|
||||
IThread * ptr = (IThread* ) data;
|
||||
ptr->Run();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pthread_t thread_;
|
||||
bool isStarted;
|
||||
bool isJoined;
|
||||
pthread_t thread_;
|
||||
bool isStarted;
|
||||
bool isJoined;
|
||||
}; // class IThread
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -1,21 +1,3 @@
|
|||
/*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
*/
|
||||
#ifndef LIMONP_THREAD_POOL_HPP
|
||||
#define LIMONP_THREAD_POOL_HPP
|
||||
|
||||
|
@ -30,73 +12,73 @@ using namespace std;
|
|||
|
||||
//class ThreadPool;
|
||||
class ThreadPool: NonCopyable {
|
||||
public:
|
||||
class Worker: public IThread {
|
||||
public:
|
||||
Worker(ThreadPool* pool): ptThreadPool_(pool) {
|
||||
assert(ptThreadPool_);
|
||||
}
|
||||
virtual ~Worker() {
|
||||
}
|
||||
|
||||
virtual void Run() {
|
||||
while(true) {
|
||||
ClosureInterface* closure = ptThreadPool_->queue_.Pop();
|
||||
if(closure == NULL) {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
closure->Run();
|
||||
} catch(std::exception& e) {
|
||||
XLOG(ERROR) << e.what();
|
||||
} catch(...) {
|
||||
XLOG(ERROR) << " unknown exception.";
|
||||
}
|
||||
delete closure;
|
||||
}
|
||||
}
|
||||
private:
|
||||
ThreadPool * ptThreadPool_;
|
||||
}; // class Worker
|
||||
|
||||
ThreadPool(size_t thread_num)
|
||||
: threads_(thread_num),
|
||||
queue_(thread_num) {
|
||||
assert(thread_num);
|
||||
for(size_t i = 0; i < threads_.size(); i ++) {
|
||||
threads_[i] = new Worker(this);
|
||||
}
|
||||
public:
|
||||
class Worker: public IThread {
|
||||
public:
|
||||
Worker(ThreadPool* pool): ptThreadPool_(pool) {
|
||||
assert(ptThreadPool_);
|
||||
}
|
||||
~ThreadPool() {
|
||||
Stop();
|
||||
virtual ~Worker() {
|
||||
}
|
||||
|
||||
void Start() {
|
||||
for(size_t i = 0; i < threads_.size(); i++) {
|
||||
threads_[i]->Start();
|
||||
virtual void Run() {
|
||||
while (true) {
|
||||
ClosureInterface* closure = ptThreadPool_->queue_.Pop();
|
||||
if (closure == NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
void Stop() {
|
||||
for(size_t i = 0; i < threads_.size(); i ++) {
|
||||
queue_.Push(NULL);
|
||||
try {
|
||||
closure->Run();
|
||||
} catch(std::exception& e) {
|
||||
XLOG(ERROR) << e.what();
|
||||
} catch(...) {
|
||||
XLOG(ERROR) << " unknown exception.";
|
||||
}
|
||||
for(size_t i = 0; i < threads_.size(); i ++) {
|
||||
threads_[i]->Join();
|
||||
delete threads_[i];
|
||||
}
|
||||
threads_.clear();
|
||||
delete closure;
|
||||
}
|
||||
}
|
||||
private:
|
||||
ThreadPool * ptThreadPool_;
|
||||
}; // class Worker
|
||||
|
||||
void Add(ClosureInterface* task) {
|
||||
assert(task);
|
||||
queue_.Push(task);
|
||||
ThreadPool(size_t thread_num)
|
||||
: threads_(thread_num),
|
||||
queue_(thread_num) {
|
||||
assert(thread_num);
|
||||
for(size_t i = 0; i < threads_.size(); i ++) {
|
||||
threads_[i] = new Worker(this);
|
||||
}
|
||||
}
|
||||
~ThreadPool() {
|
||||
Stop();
|
||||
}
|
||||
|
||||
private:
|
||||
friend class Worker;
|
||||
void Start() {
|
||||
for(size_t i = 0; i < threads_.size(); i++) {
|
||||
threads_[i]->Start();
|
||||
}
|
||||
}
|
||||
void Stop() {
|
||||
for(size_t i = 0; i < threads_.size(); i ++) {
|
||||
queue_.Push(NULL);
|
||||
}
|
||||
for(size_t i = 0; i < threads_.size(); i ++) {
|
||||
threads_[i]->Join();
|
||||
delete threads_[i];
|
||||
}
|
||||
threads_.clear();
|
||||
}
|
||||
|
||||
vector<IThread*> threads_;
|
||||
BoundedBlockingQueue<ClosureInterface*> queue_;
|
||||
void Add(ClosureInterface* task) {
|
||||
assert(task);
|
||||
queue_.Push(task);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class Worker;
|
||||
|
||||
vector<IThread*> threads_;
|
||||
BoundedBlockingQueue<ClosureInterface*> queue_;
|
||||
}; // class ThreadPool
|
||||
|
||||
} // namespace limonp
|
||||
|
|
|
@ -46,6 +46,7 @@ AppMatch::AppMatch(QObject *parent) : QThread(parent)
|
|||
if(!m_interFace->isValid()) {
|
||||
qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message());
|
||||
}
|
||||
m_interFace->setTimeout(200);
|
||||
qDebug() << "AppMatch is new";
|
||||
}
|
||||
|
||||
|
@ -102,12 +103,12 @@ void AppMatch::getAllDesktopFilePath(QString path) {
|
|||
} else {
|
||||
//过滤LXQt、KDE
|
||||
QString filePathStr = fileInfo.filePath();
|
||||
if(filePathStr.contains("KDE", Qt::CaseInsensitive) ||
|
||||
// if(filePathStr.contains("KDE", Qt::CaseInsensitive) ||
|
||||
// filePathStr.contains("mate",Qt::CaseInsensitive)||
|
||||
filePathStr.contains("LX", Qt::CaseInsensitive)) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// filePathStr.contains("LX", Qt::CaseInsensitive)) {
|
||||
// i++;
|
||||
// continue;
|
||||
// }
|
||||
//过滤后缀不是.desktop的文件
|
||||
if(!filePathStr.endsWith(".desktop")) {
|
||||
i++;
|
||||
|
@ -277,10 +278,10 @@ void AppMatch::appNameMatch(QString appname, QMap<NameString, QStringList> &inst
|
|||
}
|
||||
|
||||
void AppMatch::softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn) {
|
||||
if(m_interFace->timeout() != -1) {
|
||||
qWarning() << "softWareCente Dbus is timeout !";
|
||||
return;
|
||||
}
|
||||
// if(m_interFace->timeout() != -1) {
|
||||
// qWarning() << "softWareCente Dbus is timeout !";
|
||||
// return;
|
||||
// }
|
||||
slotDBusCallFinished(softwarereturn);
|
||||
qDebug() << "softWareCenter match app is successful!";
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
*
|
||||
*/
|
||||
#include "file-utils.h"
|
||||
#include <QXmlStreamReader>
|
||||
|
||||
using namespace Zeeker;
|
||||
size_t FileUtils::_max_index_count = 0;
|
||||
|
@ -489,6 +490,22 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
|
|||
|
||||
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
||||
|
||||
QXmlStreamReader reader(&fileR);
|
||||
|
||||
while (!reader.atEnd()){
|
||||
if(reader.readNextStartElement() and reader.name().toString() == "t"){
|
||||
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fileR.close();
|
||||
file.close();
|
||||
return;
|
||||
|
||||
/* //原加载DOM文档方式;
|
||||
QDomDocument doc;
|
||||
doc.setContent(fileR.readAll());
|
||||
fileR.close();
|
||||
|
@ -513,6 +530,7 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
|
|||
}
|
||||
file.close();
|
||||
return;
|
||||
*/
|
||||
}
|
||||
|
||||
void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
|
||||
|
@ -530,6 +548,31 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
|
|||
}
|
||||
if(fileList.isEmpty())
|
||||
return;
|
||||
|
||||
for(int i = 0; i < fileList.size(); ++i){
|
||||
QString name = prefix + QString::number(i + 1) + ".xml";
|
||||
if(!file.setCurrentFile(name)) {
|
||||
continue;
|
||||
}
|
||||
QuaZipFile fileR(&file);
|
||||
fileR.open(QIODevice::ReadOnly);
|
||||
|
||||
QXmlStreamReader reader(&fileR);
|
||||
|
||||
while (!reader.atEnd()){
|
||||
if(reader.readNextStartElement() and reader.name().toString() == "t"){
|
||||
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
fileR.close();
|
||||
}
|
||||
file.close();
|
||||
return;
|
||||
|
||||
/*
|
||||
QDomElement sptree;
|
||||
QDomElement sp;
|
||||
QDomElement txbody;
|
||||
|
@ -597,6 +640,7 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
|
|||
}
|
||||
file.close();
|
||||
return;
|
||||
*/
|
||||
}
|
||||
|
||||
void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
|
||||
|
@ -611,8 +655,24 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
|
|||
return;
|
||||
QuaZipFile fileR(&file);
|
||||
|
||||
fileR.open(QIODevice::ReadOnly); //读取方式打开
|
||||
fileR.open(QIODevice::ReadOnly);
|
||||
|
||||
QXmlStreamReader reader(&fileR);
|
||||
|
||||
while (!reader.atEnd()){
|
||||
if(reader.readNextStartElement() and reader.name().toString() == "t"){
|
||||
textcontent.append(reader.readElementText().replace("\n", "").replace("\r", " "));
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH/3){
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fileR.close();
|
||||
file.close();
|
||||
return;
|
||||
|
||||
/*
|
||||
QDomDocument doc;
|
||||
doc.setContent(fileR.readAll());
|
||||
fileR.close();
|
||||
|
@ -642,6 +702,7 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
|
|||
}
|
||||
file.close();
|
||||
return;
|
||||
*/
|
||||
}
|
||||
|
||||
void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
|
||||
|
@ -651,7 +712,7 @@ void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
|
|||
const QRectF qf;
|
||||
int pageNum = doc->numPages();
|
||||
for(int i = 0; i < pageNum; ++i) {
|
||||
textcontent.append(doc->page(i)->text(qf).replace("\n", ""));
|
||||
textcontent.append(doc->page(i)->text(qf).replace("\n", "").replace("\r", " "));
|
||||
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
|
||||
break;
|
||||
}
|
||||
|
@ -680,7 +741,7 @@ void FileUtils::getTxtContent(QString &path, QString &textcontent) {
|
|||
stream.setCodec(codec);
|
||||
uchardet_delete(chardet);
|
||||
|
||||
textcontent = stream.readAll().replace("\n", "");
|
||||
textcontent = stream.readAll().replace("\n", "").replace("\r", " ");
|
||||
|
||||
file.close();
|
||||
encodedString.clear();
|
||||
|
|
|
@ -37,6 +37,14 @@ GlobalSettings::GlobalSettings(QObject *parent) : QObject(parent) {
|
|||
// m_settings->setAtomicSyncRequired(false);
|
||||
m_block_dirs_settings = new QSettings(BLOCK_DIRS, QSettings::IniFormat, this);
|
||||
m_block_dirs_settings->setIniCodec(QTextCodec::codecForName("UTF-8"));
|
||||
// m_block_dirs_settings->setValue("These_are_block_dirs_conf_for_ukui_search","0");
|
||||
// m_block_dirs_settings->sync();
|
||||
// m_confWatcher = new QFileSystemWatcher(this);
|
||||
// m_confWatcher->addPath(BLOCK_DIRS);
|
||||
// connect(m_confWatcher, &QFileSystemWatcher::fileChanged, this, [ & ]() {
|
||||
// m_block_dirs_settings->sync();
|
||||
// m_confWatcher->addPath(BLOCK_DIRS);
|
||||
// });
|
||||
|
||||
m_search_record_settings = new QSettings(SEARCH_HISTORY, QSettings::IniFormat, this);
|
||||
m_search_record_settings->setIniCodec(QTextCodec::codecForName("UTF-8"));
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <QMutex>
|
||||
#include <QVector>
|
||||
#include <QDir>
|
||||
#include <QFileSystemWatcher>
|
||||
//#include <QGSettings>
|
||||
//If use pkg_config, it wont build succes,why?????????
|
||||
//My demo can build access yet.
|
||||
|
@ -49,9 +50,13 @@
|
|||
#define PATH_NOT_IN_HOME 2;
|
||||
#define PATH_PARENT_BLOCKED 3;
|
||||
|
||||
#define MAIN_SETTINGS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search.conf"
|
||||
#define BLOCK_DIRS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-block-dirs.conf"
|
||||
#define SEARCH_HISTORY QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-history.conf"
|
||||
//#define MAIN_SETTINGS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search.conf"
|
||||
//#define BLOCK_DIRS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-block-dirs.conf"
|
||||
//#define SEARCH_HISTORY QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-history.conf"
|
||||
|
||||
#define MAIN_SETTINGS "/media/用户保险箱/.ukui-search/ukui-search.conf"
|
||||
#define BLOCK_DIRS "/media/用户保险箱/.ukui-search/ukui-search-block-dirs.conf"
|
||||
#define SEARCH_HISTORY "/media/用户保险箱/.ukui-search/ukui-search-history.conf"
|
||||
//#define CLOUD_HISTORY "history"
|
||||
//#define CLOUD_APPLICATIONS "applications"
|
||||
|
||||
|
@ -102,6 +107,7 @@ private:
|
|||
QSettings *m_search_record_settings;
|
||||
QMap<QString, QVariant> m_cache;
|
||||
QStringList m_history;
|
||||
QFileSystemWatcher *m_confWatcher;
|
||||
|
||||
QMutex m_mutex;
|
||||
// size_t test = 0;
|
||||
|
|
|
@ -110,17 +110,21 @@ void ConstructDocumentForContent::run() {
|
|||
return;
|
||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
|
||||
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
|
||||
|
||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
|
||||
|
||||
Document doc;
|
||||
doc.setData(content);
|
||||
doc.setUniqueTerm(uniqueterm);
|
||||
doc.addTerm(upTerm);
|
||||
doc.addValue(m_path);
|
||||
for(int i = 0; i < term.size(); ++i) {
|
||||
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
|
||||
|
||||
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
||||
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
||||
|
||||
// QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
|
||||
//修改函数返回类型,修改入参为std::string引用--jxx20210519
|
||||
std::vector<cppjieba::KeywordExtractor::Word> term = ChineseSegmentation::getInstance()->callSegementStd(content.left(20480000).toStdString());
|
||||
|
||||
for(size_t i = 0; i < term.size(); ++i) {
|
||||
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
|
||||
}
|
||||
|
||||
Zeeker::_mutex_doc_list_content.lock();
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "document.h"
|
||||
#include <QDebug>
|
||||
using namespace Zeeker;
|
||||
void Document::setData(QString data) {
|
||||
void Document::setData(QString &data) {
|
||||
if(data.isEmpty())
|
||||
return;
|
||||
m_document.set_data(data.toStdString());
|
||||
|
@ -37,6 +37,17 @@ void Document::addPosting(std::string term, QVector<size_t> offset, int weight)
|
|||
}
|
||||
}
|
||||
|
||||
void Document::addPosting(std::string term, std::vector<size_t> offset, int weight) {
|
||||
if(term == "")
|
||||
return;
|
||||
if(term.length() > 240)
|
||||
term = QString::fromStdString(term).left(30).toStdString();
|
||||
|
||||
for(size_t i : offset) {
|
||||
m_document.add_posting(term, i, weight);
|
||||
}
|
||||
}
|
||||
|
||||
void Document::addPosting(std::string term, unsigned int offset, int weight) {
|
||||
if(term == "")
|
||||
return;
|
||||
|
|
|
@ -39,8 +39,9 @@ public:
|
|||
m_index_text = other.m_index_text;
|
||||
m_unique_term = other.m_unique_term;
|
||||
}
|
||||
void setData(QString data);
|
||||
void setData(QString &data);
|
||||
void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
|
||||
void addPosting(std::string term, std::vector<size_t> offset, int weight = 1);
|
||||
void addPosting(std::string term, unsigned int offset, int weight = 1);
|
||||
void addTerm(QString term);
|
||||
void addValue(QString value);
|
||||
|
|
|
@ -31,8 +31,9 @@ void FileReader::getTextContent(QString path, QString &textContent) {
|
|||
QFileInfo file(path);
|
||||
QString strsfx = file.suffix();
|
||||
if(name == "application/zip") {
|
||||
if(strsfx.endsWith("docx"))
|
||||
if(strsfx.endsWith("docx")){
|
||||
FileUtils::getDocxTextContent(path, textContent);
|
||||
}
|
||||
if(strsfx.endsWith("pptx"))
|
||||
FileUtils::getPptxTextContent(path, textContent);
|
||||
if(strsfx.endsWith("xlsx"))
|
||||
|
|
|
@ -46,7 +46,54 @@ void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
|
|||
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
|
||||
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
|
||||
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) {
|
||||
this->q_content_index->enqueue(fileInfo.absoluteFilePath());
|
||||
//this->q_content_index->enqueue(fileInfo.absoluteFilePath());
|
||||
if(fileInfo.fileName().split(".").last() == "docx"){
|
||||
QuaZip file(fileInfo.absoluteFilePath());
|
||||
if(!file.open(QuaZip::mdUnzip))
|
||||
return;
|
||||
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive))
|
||||
return;
|
||||
QuaZipFile fileR(&file);
|
||||
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//docx解压缩后的xml文件为实际需要解析文件大小
|
||||
qDebug() << "文件路径:" <<fileInfo.absoluteFilePath();
|
||||
qDebug() << "文件大小:" << fileR.usize();
|
||||
file.close();
|
||||
}else if(fileInfo.fileName().split(".").last() == "pptx"){
|
||||
QuaZip file(fileInfo.absoluteFilePath());
|
||||
if(!file.open(QuaZip::mdUnzip))
|
||||
return;
|
||||
QString prefix("ppt/slides/slide");
|
||||
qint64 fileSize(0);
|
||||
qint64 fileIndex(0);
|
||||
for(QString i : file.getFileNameList()) {
|
||||
if(i.startsWith(prefix)){
|
||||
QString name = prefix + QString::number(fileIndex + 1) + ".xml";
|
||||
fileIndex++;
|
||||
if(!file.setCurrentFile(name)) {
|
||||
continue;
|
||||
}
|
||||
QuaZipFile fileR(&file);
|
||||
fileSize += fileR.usize();
|
||||
}
|
||||
}
|
||||
file.close();
|
||||
qDebug() << "文件路径:" <<fileInfo.absoluteFilePath();
|
||||
qDebug() << "文件大小:" << fileSize;
|
||||
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileSize));//pptx解压缩后的xml文件为实际需要解析文件大小
|
||||
}else if(fileInfo.fileName().split(".").last() == "xlsx"){
|
||||
QuaZip file(fileInfo.absoluteFilePath());
|
||||
if(!file.open(QuaZip::mdUnzip))
|
||||
return;
|
||||
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive))
|
||||
return;
|
||||
QuaZipFile fileR(&file);
|
||||
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileR.usize()));//xlsx解压缩后的xml文件为实际解析文件大小
|
||||
qDebug() << "文件路径:" <<fileInfo.absoluteFilePath();
|
||||
qDebug() << "文件大小:" << fileR.usize();
|
||||
file.close();
|
||||
}else{
|
||||
this->q_content_index->enqueue(qMakePair(fileInfo.absoluteFilePath(),fileInfo.size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -90,8 +137,9 @@ void FirstIndex::run() {
|
|||
|
||||
this->q_index = new QQueue<QVector<QString>>();
|
||||
//this->q_content_index = new QQueue<QString>();
|
||||
NEW_QUEUE(this->q_content_index);
|
||||
//NEW_QUEUE(this->q_content_index);
|
||||
// this->mlm = new MessageListManager();
|
||||
this->q_content_index = new QQueue<QPair<QString,qint64>>();
|
||||
|
||||
int fifo_fd;
|
||||
char buffer[2];
|
||||
|
@ -138,7 +186,12 @@ void FirstIndex::run() {
|
|||
// QtConcurrent::run([&](){
|
||||
sem.acquire(1);
|
||||
mutex1.unlock();
|
||||
this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
|
||||
QStringList pathList;
|
||||
pathList.append("/media/用户保险箱");
|
||||
pathList.append("/media/邮件保险箱");
|
||||
pathList.append("/media/公共保险箱");
|
||||
pathList.append("/media/备份保险箱");
|
||||
this->setPath(pathList);
|
||||
this->Traverse();
|
||||
FileUtils::_max_index_count = this->q_index->length();
|
||||
qDebug() << "max_index_count:" << FileUtils::_max_index_count;
|
||||
|
@ -168,9 +221,14 @@ void FirstIndex::run() {
|
|||
qDebug() << "q_content_index:" << q_content_index->size();
|
||||
while(!this->q_content_index->empty()) {
|
||||
// for (size_t i = 0; (i < this->u_send_length) && (!this->q_content_index->empty()); ++i){
|
||||
for(size_t i = 0; (i < 30) && (!this->q_content_index->empty()); ++i) {
|
||||
tmp->enqueue(this->q_content_index->dequeue());
|
||||
qint64 fileSize = 0;
|
||||
//修改一次处理的数据量,从30个文件改为文件总大小为50M以下,50M为暂定值--jxx20210519
|
||||
for(size_t i = 0;/* (i < 30) && */(fileSize < 50*1024*1024) && (!this->q_content_index->empty()); ++i) {
|
||||
QPair<QString,qint64> tempPair = this->q_content_index->dequeue();
|
||||
fileSize += tempPair.second;
|
||||
tmp->enqueue(tempPair.first);
|
||||
}
|
||||
// qDebug() << ">>>>>>>>all fileSize:" << fileSize << "file num:" << tmp->size() << "<<<<<<<<<<<<<<<<<<<";
|
||||
this->p_indexGenerator->creatAllIndex(tmp);
|
||||
tmp->clear();
|
||||
}
|
||||
|
|
|
@ -63,7 +63,9 @@ private:
|
|||
|
||||
//test
|
||||
QQueue<QVector<QString>>* q_index;
|
||||
QQueue<QString>* q_content_index;
|
||||
// QQueue<QString>* q_content_index;
|
||||
//修改QQueue存储数据为QPair<QString,qint64>,增加存储文件大小数据便于处理时统计--jxx20210519
|
||||
QQueue<QPair<QString,qint64>>* q_content_index;
|
||||
|
||||
const QMap<QString, bool> targetFileTypeMap = {
|
||||
std::map<QString, bool>::value_type("doc", true),
|
||||
|
|
|
@ -32,8 +32,10 @@
|
|||
#include <QStandardPaths>
|
||||
|
||||
|
||||
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
|
||||
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
|
||||
//#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
|
||||
//#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
|
||||
#define INDEX_PATH "/media/用户保险箱/.ukui-search/index_data"
|
||||
#define CONTENT_INDEX_PATH "/media/用户保险箱/.ukui-search/content_index_data"
|
||||
|
||||
using namespace Zeeker;
|
||||
|
||||
|
@ -354,7 +356,7 @@ Document IndexGenerator::GenerateContentDocument(const QString &path) {
|
|||
QString upTerm;
|
||||
FileReader::getTextContent(path, content);
|
||||
|
||||
term = ChineseSegmentation::getInstance()->callSegement(content);
|
||||
term = ChineseSegmentation::getInstance()->callSegement(content.toStdString());
|
||||
// QStringList term = content.split("");
|
||||
|
||||
doc.setData(content);
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#define CONTENT_INDEX_DATABASE_STATE "content_index_database_state"
|
||||
#define INDEX_DATABASE_STATE "index_database_state"
|
||||
#define INOTIFY_NORMAL_EXIT "inotify_normal_exit"
|
||||
#define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf"
|
||||
#define INDEX_STATUS "/media/用户保险箱/.ukui-search/ukui-search-index-status.conf"
|
||||
namespace Zeeker {
|
||||
//fixme: we need a better way to record index status.
|
||||
class IndexStatusRecorder : public QObject
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
QFileInfo fi(tmp); \
|
||||
if(!fi.isSymLink()){ \
|
||||
AddWatch(tmp); \
|
||||
setPath(tmp); \
|
||||
setPath(QStringList(tmp)); \
|
||||
Traverse(); \
|
||||
}
|
||||
|
||||
|
@ -46,7 +46,7 @@
|
|||
CREATE_FILE_NAME_INDEX \
|
||||
CREATE_FILE_CONTENT_INDEX
|
||||
using namespace Zeeker;
|
||||
InotifyIndex::InotifyIndex(const QString& path) : Traverse_BFS(path) {
|
||||
InotifyIndex::InotifyIndex(const QStringList &pathList) : Traverse_BFS(pathList) {
|
||||
qDebug() << "setInotifyMaxUserWatches start";
|
||||
UkuiSearchQDBus usQDBus;
|
||||
usQDBus.setInotifyMaxUserWatches();
|
||||
|
@ -61,18 +61,21 @@ InotifyIndex::~InotifyIndex() {
|
|||
|
||||
void InotifyIndex::firstTraverse() {
|
||||
QQueue<QString> bfs;
|
||||
bfs.enqueue(this->path);
|
||||
QFileInfoList list;
|
||||
QDir dir;
|
||||
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
|
||||
dir.setSorting(QDir::DirsFirst);
|
||||
while(!bfs.empty()) {
|
||||
dir.setPath(bfs.dequeue());
|
||||
list = dir.entryInfoList();
|
||||
for(auto i : list) {
|
||||
if(i.isDir() && (!(i.isSymLink()))) {
|
||||
this->AddWatch(i.absoluteFilePath());
|
||||
bfs.enqueue(i.absoluteFilePath());
|
||||
for(QString path : this->m_pathList) {
|
||||
this->AddWatch(path);
|
||||
bfs.enqueue(path);
|
||||
QFileInfoList list;
|
||||
QDir dir;
|
||||
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
|
||||
dir.setSorting(QDir::DirsFirst);
|
||||
while(!bfs.empty()) {
|
||||
dir.setPath(bfs.dequeue());
|
||||
list = dir.entryInfoList();
|
||||
for(auto i : list) {
|
||||
if(i.isDir() && (!(i.isSymLink()))) {
|
||||
this->AddWatch(i.absoluteFilePath());
|
||||
bfs.enqueue(i.absoluteFilePath());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -248,8 +251,8 @@ void InotifyIndex::run() {
|
|||
m_fd = inotify_init();
|
||||
qDebug() << "m_fd----------->" << m_fd;
|
||||
|
||||
this->AddWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
|
||||
this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
|
||||
// this->AddWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
|
||||
// this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
|
||||
this->firstTraverse();
|
||||
|
||||
int fifo_fd;
|
||||
|
|
|
@ -40,13 +40,13 @@ static InotifyIndex* global_instance_of_index = nullptr;
|
|||
class InotifyIndex : public QThread, public Traverse_BFS {
|
||||
Q_OBJECT
|
||||
public:
|
||||
static InotifyIndex* getInstance(const QString& path) {
|
||||
static InotifyIndex* getInstance(const QStringList &pathList) {
|
||||
if(!global_instance_of_index) {
|
||||
global_instance_of_index = new InotifyIndex(path);
|
||||
global_instance_of_index = new InotifyIndex(pathList);
|
||||
}
|
||||
return global_instance_of_index;
|
||||
}
|
||||
InotifyIndex(const QString&);
|
||||
InotifyIndex(const QStringList &pathList);
|
||||
~InotifyIndex();
|
||||
|
||||
bool AddWatch(const QString&);
|
||||
|
|
|
@ -27,7 +27,7 @@ QMutex SearchManager::m_mutex1;
|
|||
QMutex SearchManager::m_mutex2;
|
||||
QMutex SearchManager::m_mutex3;
|
||||
SearchManager::SearchManager(QObject *parent) : QObject(parent) {
|
||||
m_pool.setMaxThreadCount(2);
|
||||
m_pool.setMaxThreadCount(3);
|
||||
m_pool.setExpiryTimeout(1000);
|
||||
}
|
||||
|
||||
|
@ -301,36 +301,22 @@ int FileContentSearch::keywordSearchContent() {
|
|||
ret.erase(ret.begin(), ret.end());
|
||||
::friso::ResultMap().swap(ret);
|
||||
*/
|
||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword);
|
||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
|
||||
//Creat a query
|
||||
std::string words;
|
||||
for(int i = 0; i < sKeyWord.size(); i++) {
|
||||
words.append(sKeyWord.at(i).word).append(" ");
|
||||
}
|
||||
|
||||
Xapian::Query query = qp.parse_query(words);
|
||||
// Xapian::Query query = qp.parse_query(keyword.toStdString());
|
||||
|
||||
|
||||
|
||||
// QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword);
|
||||
// //Creat a query
|
||||
// std::string words;
|
||||
// for(int i=0;i<sKeyWord.size();i++)
|
||||
// {
|
||||
// words.append(sKeyWord.at(i).word).append(" ");
|
||||
// }
|
||||
|
||||
|
||||
// Xapian::Query query = qp.parse_query(words);
|
||||
|
||||
// std::vector<Xapian::Query> v;
|
||||
// for(int i=0;i<sKeyWord.size();i++)
|
||||
// {
|
||||
// v.push_back(Xapian::Query(sKeyWord.at(i).word));
|
||||
// qDebug()<<QString::fromStdString(sKeyWord.at(i).word);
|
||||
// }
|
||||
// Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
|
||||
std::vector<Xapian::Query> v;
|
||||
for(int i=0; i<sKeyWord.size(); i++) {
|
||||
v.push_back(Xapian::Query(sKeyWord.at(i).word));
|
||||
qDebug() << QString::fromStdString(sKeyWord.at(i).word);
|
||||
}
|
||||
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
|
||||
|
||||
qDebug() << "keywordSearchContent:" << QString::fromStdString(query.get_description());
|
||||
|
||||
enquire.set_query(query);
|
||||
|
@ -645,7 +631,7 @@ int FileContentSearchV4::keywordSearchContent()
|
|||
ret.erase(ret.begin(), ret.end());
|
||||
::friso::ResultMap().swap(ret);
|
||||
*/
|
||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword);
|
||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
|
||||
//Creat a query
|
||||
std::string words;
|
||||
for(int i=0;i<sKeyWord.size();i++)
|
||||
|
@ -803,6 +789,7 @@ void DirectSearch::run() {
|
|||
// QDir::Hidden
|
||||
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
|
||||
dir.setSorting(QDir::DirsFirst);
|
||||
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
|
||||
while(!bfs.empty()) {
|
||||
dir.setPath(bfs.dequeue());
|
||||
list = dir.entryInfoList();
|
||||
|
@ -810,8 +797,6 @@ void DirectSearch::run() {
|
|||
if (i.isDir() && (!(i.isSymLink()))) {
|
||||
|
||||
bool findIndex = false;
|
||||
|
||||
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
|
||||
for (QString j : blockList) {
|
||||
if (i.absoluteFilePath().startsWith(j.prepend("/"))) {
|
||||
findIndex = true;
|
||||
|
|
|
@ -44,9 +44,11 @@
|
|||
#include "chinese-segmentation.h"
|
||||
|
||||
|
||||
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
|
||||
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
|
||||
//#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
|
||||
//#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
|
||||
|
||||
#define INDEX_PATH "/media/用户保险箱/.ukui-search/index_data"
|
||||
#define CONTENT_INDEX_PATH "/media/用户保险箱/.ukui-search/content_index_data"
|
||||
namespace Zeeker {
|
||||
struct SearchResultInfo{
|
||||
std::string filePath;
|
||||
|
|
|
@ -15,7 +15,12 @@ void SearchMethodManager::searchMethod(FileUtils::SearchMethod sm) {
|
|||
qWarning() << "start inotify index";
|
||||
// InotifyIndex ii("/home");
|
||||
// ii.start();
|
||||
this->m_ii = InotifyIndex::getInstance("/home");
|
||||
QStringList pathList;
|
||||
pathList.append("/media/用户保险箱");
|
||||
pathList.append("/media/邮件保险箱");
|
||||
pathList.append("/media/公共保险箱");
|
||||
pathList.append("/media/备份保险箱");
|
||||
this->m_ii = InotifyIndex::getInstance(pathList);
|
||||
if(!this->m_ii->isRunning()) {
|
||||
this->m_ii->start();
|
||||
}
|
||||
|
|
|
@ -19,31 +19,36 @@
|
|||
*/
|
||||
#include "traverse_bfs.h"
|
||||
using namespace Zeeker;
|
||||
Traverse_BFS::Traverse_BFS(const QString& path) {
|
||||
Q_ASSERT('/' == path.at(0));
|
||||
this->path = path;
|
||||
Traverse_BFS::Traverse_BFS(const QStringList& pathList) {
|
||||
for(QString path : pathList) {
|
||||
Q_ASSERT('/' == path.at(0));
|
||||
}
|
||||
|
||||
this->m_pathList = pathList;
|
||||
}
|
||||
|
||||
void Traverse_BFS::Traverse() {
|
||||
QQueue<QString> bfs;
|
||||
bfs.enqueue(this->path);
|
||||
QFileInfoList list;
|
||||
QDir dir;
|
||||
// QDir::Hidden
|
||||
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
|
||||
dir.setSorting(QDir::DirsFirst);
|
||||
while(!bfs.empty()) {
|
||||
dir.setPath(bfs.dequeue());
|
||||
list = dir.entryInfoList();
|
||||
for(auto i : list) {
|
||||
if(i.isDir() && (!(i.isSymLink()))) {
|
||||
bfs.enqueue(i.absoluteFilePath());
|
||||
for(QString path : m_pathList) {
|
||||
bfs.enqueue(path);
|
||||
QFileInfoList list;
|
||||
QDir dir;
|
||||
// QDir::Hidden
|
||||
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
|
||||
dir.setSorting(QDir::DirsFirst);
|
||||
while(!bfs.empty()) {
|
||||
dir.setPath(bfs.dequeue());
|
||||
list = dir.entryInfoList();
|
||||
for(auto i : list) {
|
||||
if(i.isDir() && (!(i.isSymLink()))) {
|
||||
bfs.enqueue(i.absoluteFilePath());
|
||||
}
|
||||
DoSomething(i);
|
||||
}
|
||||
DoSomething(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Traverse_BFS::setPath(const QString& path) {
|
||||
this->path = path;
|
||||
void Traverse_BFS::setPath(const QStringList &pathList) {
|
||||
this->m_pathList = pathList;
|
||||
}
|
||||
|
|
|
@ -31,10 +31,10 @@ public:
|
|||
void Traverse();
|
||||
virtual ~Traverse_BFS() = default;
|
||||
virtual void DoSomething(const QFileInfo&) = 0;
|
||||
void setPath(const QString&);
|
||||
void setPath(const QStringList &pathList);
|
||||
protected:
|
||||
Traverse_BFS(const QString&);
|
||||
QString path = "/home";
|
||||
Traverse_BFS(const QStringList &pathList);
|
||||
QStringList m_pathList;
|
||||
private:
|
||||
Traverse_BFS(const Traverse_BFS&) = delete;
|
||||
void operator=(const Traverse_BFS&) = delete;
|
||||
|
|
|
@ -38,7 +38,7 @@ UkuiSearchQDBus::~UkuiSearchQDBus() {
|
|||
//一键三连
|
||||
void UkuiSearchQDBus::setInotifyMaxUserWatches() {
|
||||
// /proc/sys/fs/inotify/max_user_watches
|
||||
this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep1");
|
||||
// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep1");
|
||||
// sysctl
|
||||
this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2");
|
||||
// /etc/sysctl.conf
|
||||
|
|
|
@ -67,7 +67,7 @@ unix {
|
|||
INSTALLS += target
|
||||
|
||||
header.path = /usr/include/ukui-search
|
||||
header.files += *.h index/*.h appsearch/*.h settingsearch/*.h
|
||||
header.files += *.h index/*.h appsearch/*.h settingsearch/*.h plugininterface/*.h
|
||||
INSTALLS += header
|
||||
}
|
||||
|
||||
|
|
|
@ -4457,15 +4457,14 @@ bool bReadBuffer(FILE *pFile, ULONG ulStartBlock,
|
|||
ULONG ulBegin, ulIndex;
|
||||
size_t tLen;
|
||||
|
||||
for(ulIndex = ulStartBlock;
|
||||
ulIndex != END_OF_CHAIN && tToRead != 0;
|
||||
ulIndex = aulBlockDepot[ulIndex]) {
|
||||
for(ulIndex = ulStartBlock;ulIndex != END_OF_CHAIN && tToRead != 0;ulIndex = aulBlockDepot[ulIndex]) {
|
||||
if(ulIndex >= (ULONG)tBlockDepotLen) {
|
||||
if(tBlockSize >= BIG_BLOCK_SIZE) {
|
||||
qWarning() << "The Big Block Depot is damaged";
|
||||
} else {
|
||||
qWarning() << "The Small Block Depot is damaged";
|
||||
}
|
||||
return (tToRead == 0);
|
||||
}
|
||||
if(ulOffset >= (ULONG)tBlockSize) {
|
||||
ulOffset -= tBlockSize;
|
||||
|
@ -4964,7 +4963,7 @@ bool KBinaryParser::read8DocText(FILE *pFile, const ppsInfoType *pPPS,
|
|||
|
||||
if(bUsesUnicode) {
|
||||
ushort* usAucData = (ushort*)ptaucBytes;
|
||||
content.append(QString::fromUtf16(usAucData).replace("\r", ""));
|
||||
content.append(QString::fromUtf16(usAucData).replace("\n", "").replace("\r", " "));
|
||||
usAucData = (ushort*)xfree((void*)usAucData);
|
||||
ptaucBytes = NULL;
|
||||
if(content.length() >= 682666) //20480000/3
|
||||
|
@ -5067,7 +5066,7 @@ int KBinaryParser:: readSSTRecord(readDataParam &rdParam, ppsInfoType PPS_info,
|
|||
} else {
|
||||
ushort* usData = (ushort*)chData;
|
||||
|
||||
content.append(QString::fromUtf16(usData).replace("\r", ""));
|
||||
content.append(QString::fromUtf16(usData).replace("\n", "").replace("\r", " "));
|
||||
usData = (ushort*)xfree((void*)usData);
|
||||
chData = NULL;
|
||||
if(content.length() >= 682666) //20480000/3
|
||||
|
@ -5132,7 +5131,7 @@ ULONG KBinaryParser::readPPtRecord(FILE* pFile, ppsInfoType* PPS_info, ULONG* au
|
|||
return -1;
|
||||
ushort* usData = (ushort*)chData;
|
||||
|
||||
content.append(QString::fromUtf16(usData).replace("\r", ""));
|
||||
content.append(QString::fromUtf16(usData).replace("\n", "").replace("\r", " "));
|
||||
|
||||
usData = (ushort*)xfree((void*)usData);
|
||||
chData = NULL;
|
||||
|
|
|
@ -24,12 +24,13 @@ public:
|
|||
QIcon icon;
|
||||
QString name;
|
||||
QVector<DescriptionInfo> description;
|
||||
QMap<QString,QString> actionMap;//action name and action key
|
||||
QStringList actionList; //all actions, take fist for double click action.
|
||||
QString key;
|
||||
};
|
||||
virtual ~SearchPluginIface() {}
|
||||
virtual QString getPluginName() = 0;
|
||||
virtual void KeywordSearch(QString keyword,QQueue<ResultInfo> *searchResult) = 0;
|
||||
virtual void openAction(QString name, QString key) = 0;
|
||||
virtual void openAction(QString action, QString key) = 0;
|
||||
|
||||
};
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
*/
|
||||
#include "setting-match.h"
|
||||
#include "file-utils.h"
|
||||
#include <QProcessEnvironment>
|
||||
using namespace Zeeker;
|
||||
SettingsMatch::SettingsMatch(QObject *parent) : QObject(parent) {
|
||||
xmlElement();
|
||||
|
@ -44,6 +45,8 @@ QStringList SettingsMatch::startMatchApp(const QString &source) {
|
|||
void SettingsMatch::xmlElement() {
|
||||
QString ChineseIndex;
|
||||
QString EnglishIndex;
|
||||
QString path = QProcessEnvironment::systemEnvironment().value("XDG_SESSION_TYPE");
|
||||
QString version;
|
||||
QFile file(QString::fromLocal8Bit("/usr/share/ukui-control-center/shell/res/search.xml"));
|
||||
if(!file.open(QIODevice::ReadOnly)) {
|
||||
return;
|
||||
|
@ -62,6 +65,12 @@ void SettingsMatch::xmlElement() {
|
|||
QDomNodeList list = element.childNodes();
|
||||
for(int i = 0; i < list.count(); ++i) {
|
||||
QDomNode n = list.at(i);
|
||||
if(n.nodeName()==QString::fromLocal8Bit("Environment")){
|
||||
version=n.toElement().text();
|
||||
if((version=="v101"&&path=="wayland")||(version=="hw990"&&path=="x11")){
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(n.nodeName() == QString::fromLocal8Bit("ChinesePlugin")) {
|
||||
ChineseIndex = n.toElement().text();
|
||||
}
|
||||
|
|
|
@ -52,6 +52,9 @@ ContentWidget::~ContentWidget() {
|
|||
* @brief initUI 初始化homepage和resultpage
|
||||
*/
|
||||
void ContentWidget::initUI() {
|
||||
QPalette pal = palette();
|
||||
pal.setColor(QPalette::Base, QColor(0, 0, 0, 0));
|
||||
pal.setColor(QPalette::Window, QColor(0, 0, 0, 0)); //使用此palette的窗口背景将为透明
|
||||
m_homePage = new QWidget(this);
|
||||
m_homePageLyt = new QVBoxLayout(m_homePage);
|
||||
m_homePageLyt->setSpacing(0);
|
||||
|
@ -79,7 +82,6 @@ void ContentWidget::initUI() {
|
|||
m_detailLyt = new QVBoxLayout(m_resultDetail);
|
||||
m_resultList->setFixedWidth(236);
|
||||
m_resultList->setFixedHeight(0);
|
||||
m_resultList->setStyleSheet("QWidget{background:transparent;}");
|
||||
m_listLyt->setContentsMargins(0, 0, 12, 0);
|
||||
m_listLyt->setSpacing(0);
|
||||
m_resultListArea->setWidget(m_resultList);
|
||||
|
@ -89,10 +91,13 @@ void ContentWidget::initUI() {
|
|||
clearLayout(m_homePageLyt);
|
||||
initHomePage();
|
||||
});
|
||||
connect(m_detailView, &SearchDetailView::actionTriggerd, this, &ContentWidget::effectiveSearch);
|
||||
m_resultDetailArea->setWidget(m_detailView);
|
||||
m_resultDetailArea->setWidgetResizable(true);
|
||||
m_resultListArea->setStyleSheet("QScrollArea{background: transparent;}");
|
||||
m_resultDetailArea->setStyleSheet("QScrollArea{background: transparent; border-radius: 4px;}");
|
||||
m_resultListArea->setFrameShape(QFrame::NoFrame);
|
||||
m_resultDetailArea->setFrameShape(QFrame::NoFrame);
|
||||
m_resultListArea->setPalette(pal);
|
||||
m_resultDetailArea->setPalette(pal);
|
||||
this->addWidget(m_homePage);
|
||||
this->addWidget(m_resultPage);
|
||||
|
||||
|
@ -226,7 +231,7 @@ void ContentWidget::initListView() {
|
|||
this->resetListHeight();
|
||||
});
|
||||
|
||||
connect(qApp, &QApplication::paletteChanged, this, [ = ](const QPalette &pal) {
|
||||
connect(qApp, &QApplication::paletteChanged, this, [ = ]() {
|
||||
m_fileListView->refresh();
|
||||
m_dirListView->refresh();
|
||||
m_contentListView->refresh();
|
||||
|
@ -279,7 +284,7 @@ void ContentWidget::setupConnect(SearchListView * listview) {
|
|||
connect(listview, &SearchListView::currentSelectPos, [ = ](QPoint pos) {
|
||||
m_resultListArea->ensureVisible(pos.x(), pos.y());
|
||||
});
|
||||
connect(listview, &SearchListView::mousePressed, this, &ContentWidget::mousePressed);
|
||||
connect(listview, &SearchListView::mousePressed, this, &ContentWidget::effectiveSearch);
|
||||
connect(listview, &SearchListView::currentRowChanged, this, &ContentWidget::onListViewRowChanged);
|
||||
connect(listview, &SearchListView::onRowDoubleClicked, this, &ContentWidget::onListViewRowDoubleClicked);
|
||||
}
|
||||
|
|
|
@ -109,7 +109,7 @@ private:
|
|||
|
||||
Q_SIGNALS:
|
||||
void currentItemChanged();
|
||||
void mousePressed();
|
||||
void effectiveSearch();
|
||||
|
||||
private Q_SLOTS:
|
||||
void clearLayout(QLayout *);
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include <QDebug>
|
||||
#include <QFileInfo>
|
||||
#include <QDir>
|
||||
#define HOMEPAGE_SETTINGS QDir::homePath()+"/.config/org.ukui/ukui-search/ukui-search-homepage.conf"
|
||||
#define HOMEPAGE_SETTINGS "media/用户保险箱/.ukui-search/ukui-search-homepage.conf"
|
||||
namespace Zeeker {
|
||||
class ConfigFile : public QObject {
|
||||
Q_OBJECT
|
||||
|
|
|
@ -53,8 +53,9 @@ void FolderListItem::initUi() {
|
|||
m_iconLabel->setPixmap(QIcon::fromTheme("inode-directory").pixmap(QSize(16, 16)));
|
||||
m_pathLabel->setText(m_path);
|
||||
m_delLabel->setText(tr("Delete the folder out of blacklist"));
|
||||
m_pathLabel->setStyleSheet("QLabel{color: palette(text); background: transparent;}");
|
||||
m_delLabel->setStyleSheet("QLabel{color: #3790FA; background: transparent;}");
|
||||
QPalette pal = palette();
|
||||
pal.setColor(QPalette::WindowText, QColor(55, 144, 250, 255));
|
||||
m_delLabel->setPalette(pal);
|
||||
m_delLabel->setCursor(QCursor(Qt::PointingHandCursor));
|
||||
m_delLabel->installEventFilter(this);
|
||||
m_delLabel->hide();
|
||||
|
|
|
@ -56,7 +56,6 @@ void HomePageItem::setupUi(const int& type, const QString& path) {
|
|||
m_type = type;
|
||||
m_widget = new QWidget(this);
|
||||
m_widget->setObjectName("MainWidget");
|
||||
// m_widget->setStyleSheet("QWidget#MainWidget{background: rgba(0, 0, 0, 0.05); border-radius: 4px;}");
|
||||
m_widget->installEventFilter(this);
|
||||
m_iconlabel = new QLabel(m_widget);
|
||||
m_namelabel = new QLabel(m_widget);
|
||||
|
|
|
@ -91,48 +91,52 @@ void OptionView::setupOptions(const int& type, bool is_appInstalled) {
|
|||
|
||||
|
||||
void OptionView::initUI() {
|
||||
QPalette pal = palette();
|
||||
pal.setColor(QPalette::WindowText, NORMAL_COLOR);
|
||||
pal.setColor(QPalette::Light, HOVER_COLOR);
|
||||
pal.setColor(QPalette::Dark, PRESS_COLOR);
|
||||
m_optionFrame = new QFrame(this);
|
||||
m_optionLyt = new QVBoxLayout(m_optionFrame);
|
||||
m_optionLyt->setContentsMargins(8, 0, 0, 0);
|
||||
|
||||
m_openLabel = new QLabel(m_optionFrame);
|
||||
m_openLabel->setText(tr("Open")); //打开
|
||||
m_openLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_openLabel->setPalette(pal);
|
||||
m_openLabel->setCursor(QCursor(Qt::PointingHandCursor));
|
||||
m_openLabel->installEventFilter(this);
|
||||
m_optionLyt->addWidget(m_openLabel);
|
||||
|
||||
m_shortcutLabel = new QLabel(m_optionFrame);
|
||||
m_shortcutLabel->setText(tr("Add Shortcut to Desktop")); //添加到桌面快捷方式
|
||||
m_shortcutLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_shortcutLabel->setPalette(pal);
|
||||
m_shortcutLabel->setCursor(QCursor(Qt::PointingHandCursor));
|
||||
m_shortcutLabel->installEventFilter(this);
|
||||
m_optionLyt->addWidget(m_shortcutLabel);
|
||||
|
||||
m_panelLabel = new QLabel(m_optionFrame);
|
||||
m_panelLabel->setText(tr("Add Shortcut to Panel")); //添加到任务栏快捷方式
|
||||
m_panelLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_panelLabel->setPalette(pal);
|
||||
m_panelLabel->setCursor(QCursor(Qt::PointingHandCursor));
|
||||
m_panelLabel->installEventFilter(this);
|
||||
m_optionLyt->addWidget(m_panelLabel);
|
||||
|
||||
m_openPathLabel = new QLabel(m_optionFrame);
|
||||
m_openPathLabel->setText(tr("Open path")); //打开所在路径
|
||||
m_openPathLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_openPathLabel->setPalette(pal);
|
||||
m_openPathLabel->setCursor(QCursor(Qt::PointingHandCursor));
|
||||
m_openPathLabel->installEventFilter(this);
|
||||
m_optionLyt->addWidget(m_openPathLabel);
|
||||
|
||||
m_copyPathLabel = new QLabel(m_optionFrame);
|
||||
m_copyPathLabel->setText(tr("Copy path")); //复制所在路径
|
||||
m_copyPathLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_copyPathLabel->setPalette(pal);
|
||||
m_copyPathLabel->setCursor(QCursor(Qt::PointingHandCursor));
|
||||
m_copyPathLabel->installEventFilter(this);
|
||||
m_optionLyt->addWidget(m_copyPathLabel);
|
||||
|
||||
m_installLabel = new QLabel(m_optionFrame);
|
||||
m_installLabel->setText(tr("Install")); //复制所在路径
|
||||
m_installLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_installLabel->setPalette(pal);
|
||||
m_installLabel->setCursor(QCursor(Qt::PointingHandCursor));
|
||||
m_installLabel->installEventFilter(this);
|
||||
m_optionLyt->addWidget(m_installLabel);
|
||||
|
@ -235,92 +239,92 @@ void OptionView::setupSettingOptions() {
|
|||
bool OptionView::eventFilter(QObject *watched, QEvent *event) {
|
||||
if(m_openLabel && watched == m_openLabel) {
|
||||
if(event->type() == QEvent::MouseButtonPress) {
|
||||
m_openLabel->setStyleSheet("QLabel{font-size: 14px; color: #296CD9;}");
|
||||
m_openLabel->setForegroundRole(QPalette::Dark);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::MouseButtonRelease) {
|
||||
m_openLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_openLabel->setForegroundRole(QPalette::WindowText);
|
||||
Q_EMIT this->onOptionClicked(Options::Open);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Enter) {
|
||||
m_openLabel->setStyleSheet("QLabel{font-size: 14px; color: #40A9FB;}");
|
||||
m_openLabel->setForegroundRole(QPalette::Light);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Leave) {
|
||||
m_openLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_openLabel->setForegroundRole(QPalette::WindowText);
|
||||
return true;
|
||||
}
|
||||
} else if(m_shortcutLabel && watched == m_shortcutLabel) {
|
||||
if(event->type() == QEvent::MouseButtonPress) {
|
||||
m_shortcutLabel->setStyleSheet("QLabel{font-size: 14px; color: #296CD9;}");
|
||||
m_shortcutLabel->setForegroundRole(QPalette::Dark);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::MouseButtonRelease) {
|
||||
m_shortcutLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_shortcutLabel->setForegroundRole(QPalette::WindowText);
|
||||
Q_EMIT this->onOptionClicked(Options::Shortcut);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Enter) {
|
||||
m_shortcutLabel->setStyleSheet("QLabel{font-size: 14px; color: #40A9FB;}");
|
||||
m_shortcutLabel->setForegroundRole(QPalette::Light);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Leave) {
|
||||
m_shortcutLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_shortcutLabel->setForegroundRole(QPalette::WindowText);
|
||||
return true;
|
||||
}
|
||||
} else if(m_panelLabel && watched == m_panelLabel) {
|
||||
if(event->type() == QEvent::MouseButtonPress) {
|
||||
m_panelLabel->setStyleSheet("QLabel{font-size: 14px; color: #296CD9;}");
|
||||
m_panelLabel->setForegroundRole(QPalette::Dark);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::MouseButtonRelease) {
|
||||
m_panelLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_panelLabel->setForegroundRole(QPalette::WindowText);
|
||||
Q_EMIT this->onOptionClicked(Options::Panel);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Enter) {
|
||||
m_panelLabel->setStyleSheet("QLabel{font-size: 14px; color: #40A9FB;}");
|
||||
m_panelLabel->setForegroundRole(QPalette::Light);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Leave) {
|
||||
m_panelLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_panelLabel->setForegroundRole(QPalette::WindowText);
|
||||
return true;
|
||||
}
|
||||
} else if(m_openPathLabel && watched == m_openPathLabel) {
|
||||
if(event->type() == QEvent::MouseButtonPress) {
|
||||
m_openPathLabel->setStyleSheet("QLabel{font-size: 14px; color: #296CD9;}");
|
||||
m_openPathLabel->setForegroundRole(QPalette::Dark);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::MouseButtonRelease) {
|
||||
m_openPathLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_openPathLabel->setForegroundRole(QPalette::WindowText);
|
||||
Q_EMIT this->onOptionClicked(Options::OpenPath);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Enter) {
|
||||
m_openPathLabel->setStyleSheet("QLabel{font-size: 14px; color: #40A9FB;}");
|
||||
m_openPathLabel->setForegroundRole(QPalette::Light);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Leave) {
|
||||
m_openPathLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_openPathLabel->setForegroundRole(QPalette::WindowText);
|
||||
return true;
|
||||
}
|
||||
} else if(m_copyPathLabel && watched == m_copyPathLabel) {
|
||||
if(event->type() == QEvent::MouseButtonPress) {
|
||||
m_copyPathLabel->setStyleSheet("QLabel{font-size: 14px; color: #296CD9;}");
|
||||
m_copyPathLabel->setForegroundRole(QPalette::Dark);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::MouseButtonRelease) {
|
||||
m_copyPathLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_copyPathLabel->setForegroundRole(QPalette::WindowText);
|
||||
Q_EMIT this->onOptionClicked(Options::CopyPath);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Enter) {
|
||||
m_copyPathLabel->setStyleSheet("QLabel{font-size: 14px; color: #40A9FB;}");
|
||||
m_copyPathLabel->setForegroundRole(QPalette::Light);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Leave) {
|
||||
m_copyPathLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_copyPathLabel->setForegroundRole(QPalette::WindowText);
|
||||
return true;
|
||||
}
|
||||
} else if(m_installLabel && watched == m_installLabel) {
|
||||
if(event->type() == QEvent::MouseButtonPress) {
|
||||
m_installLabel->setStyleSheet("QLabel{font-size: 14px; color: #296CD9;}");
|
||||
m_installLabel->setForegroundRole(QPalette::Dark);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::MouseButtonRelease) {
|
||||
m_installLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_installLabel->setForegroundRole(QPalette::WindowText);
|
||||
Q_EMIT this->onOptionClicked(Options::Install);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Enter) {
|
||||
m_installLabel->setStyleSheet("QLabel{font-size: 14px; color: #40A9FB;}");
|
||||
m_installLabel->setForegroundRole(QPalette::Light);
|
||||
return true;
|
||||
} else if(event->type() == QEvent::Leave) {
|
||||
m_installLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_installLabel->setForegroundRole(QPalette::WindowText);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,10 @@
|
|||
#include <QHBoxLayout>
|
||||
#include "search-list-view.h"
|
||||
|
||||
#define NORMAL_COLOR QColor(55, 144, 250, 255)
|
||||
#define HOVER_COLOR QColor(64, 169, 251, 255)
|
||||
#define PRESS_COLOR QColor(41, 108, 217, 255)
|
||||
|
||||
namespace Zeeker {
|
||||
class OptionView : public QWidget {
|
||||
Q_OBJECT
|
||||
|
|
|
@ -217,7 +217,8 @@ void SearchDetailView::setAppWidget(const QString &appname, const QString &path,
|
|||
|
||||
QFontMetrics fontMetrics = m_nameLabel->fontMetrics();
|
||||
QString showname = fontMetrics.elidedText(m_name, Qt::ElideRight, 274); //当字体长度超过215时显示为省略号
|
||||
m_nameLabel->setText(showname);
|
||||
// m_nameLabel->setText(showname);
|
||||
m_nameLabel->setText(QString("<h3 style=\"font-weight:normal;\">%1</h3>").arg(escapeHtml(showname)));
|
||||
if(QString::compare(showname, m_name)) {
|
||||
m_nameLabel->setToolTip(m_name);
|
||||
}
|
||||
|
@ -350,18 +351,19 @@ void SearchDetailView::setupWidget(const int& type, const QString& path) {
|
|||
QFontMetrics fontMetrics = m_nameLabel->fontMetrics();
|
||||
QString wholeName = FileUtils::getFileName(path);
|
||||
QString name = fontMetrics.elidedText(wholeName, Qt::ElideRight, 274);
|
||||
m_nameLabel->setText(name);
|
||||
// m_nameLabel->setText(name);
|
||||
m_nameLabel->setText(QString("<h3 style=\"font-weight:normal;\">%1</h3>").arg(escapeHtml(name)));
|
||||
if(QString::compare(name, wholeName)) {
|
||||
m_nameLabel->setToolTip(wholeName);
|
||||
}
|
||||
m_nameLabel->setTextFormat(Qt::PlainText); //显示纯文本
|
||||
m_typeLabel->setText(tr("Document"));
|
||||
break;
|
||||
}
|
||||
case SearchListView::ResType::Setting : {
|
||||
setIcon(path);
|
||||
QString settingType = path.mid(path.indexOf("/") + 1, path.lastIndexOf("/") - path.indexOf("/") - 1); //配置项所属控制面板插件名
|
||||
m_nameLabel->setText(settingType);
|
||||
// m_nameLabel->setText(settingType);
|
||||
m_nameLabel->setText(QString("<h3 style=\"font-weight:normal;\">%1</h3>").arg(escapeHtml(settingType)));
|
||||
m_typeLabel->setText(FileUtils::getSettingName(path));
|
||||
break;
|
||||
}
|
||||
|
@ -404,6 +406,7 @@ void SearchDetailView::execActions(const int& type, const int& option, const QSt
|
|||
default:
|
||||
break;
|
||||
}
|
||||
Q_EMIT this->actionTriggerd();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -462,8 +465,6 @@ void SearchDetailView::initUI() {
|
|||
m_layout = new QVBoxLayout(this);
|
||||
this->setLayout(m_layout);
|
||||
m_layout->setContentsMargins(16, 60, 16, 24);
|
||||
this->setObjectName("detailView");
|
||||
this->setStyleSheet("QWidget#detailView{background:transparent;}");
|
||||
this->setFixedWidth(378);
|
||||
|
||||
//没有网络的时候的提示信息
|
||||
|
@ -491,8 +492,7 @@ void SearchDetailView::initUI() {
|
|||
m_nameLayout = new QHBoxLayout(m_nameFrame);
|
||||
m_nameLabel = new QLabel(m_nameFrame);
|
||||
m_typeLabel = new QLabel(m_nameFrame);
|
||||
m_nameLabel->setStyleSheet("QLabel{font-size: 18px;}");
|
||||
m_typeLabel->setStyleSheet("QLabel{font-size: 14px; color: palette(mid);}");
|
||||
m_typeLabel->setEnabled(false);
|
||||
m_nameFrame->setFixedHeight(48);
|
||||
m_nameLabel->setMaximumWidth(280);
|
||||
m_nameLayout->addWidget(m_nameLabel);
|
||||
|
|
|
@ -105,6 +105,7 @@ private:
|
|||
|
||||
Q_SIGNALS:
|
||||
void configFileChanged();
|
||||
void actionTriggerd();
|
||||
private Q_SLOTS:
|
||||
void execActions(const int&, const int&, const QString&);
|
||||
void refreshIcon();
|
||||
|
|
|
@ -21,12 +21,11 @@
|
|||
#include "search-list-view.h"
|
||||
#include <QDebug>
|
||||
#include <QFileInfo>
|
||||
#include "custom-style.h"
|
||||
|
||||
using namespace Zeeker;
|
||||
SearchListView::SearchListView(QWidget * parent, const QStringList& list, const int& type) : QTreeView(parent) {
|
||||
// CustomStyle * style = new CustomStyle(GlobalSettings::getInstance()->getValue(STYLE_NAME_KEY).toString());
|
||||
this->setStyle(CustomStyle::getStyle());
|
||||
this->setFrameShape(QFrame::NoFrame);
|
||||
this->viewport()->setAutoFillBackground(false);
|
||||
setRootIsDecorated(false);
|
||||
|
||||
this->setVerticalScrollBarPolicy(Qt::ScrollBarAlwaysOff);
|
||||
|
@ -43,7 +42,6 @@ SearchListView::SearchListView(QWidget * parent, const QStringList& list, const
|
|||
// this->setFixedHeight(list.count() * rowheight + 4);
|
||||
this->setAttribute(Qt::WA_TranslucentBackground, true);
|
||||
this->setAutoFillBackground(false);
|
||||
// this->setStyleSheet("QWidget{background:transparent;}");
|
||||
m_styleDelegate = new HighlightItemDelegate(this);
|
||||
// m_styleDelegate->setSearchKeyword(keyword);
|
||||
this->setItemDelegate(m_styleDelegate);
|
||||
|
@ -130,13 +128,6 @@ void SearchListView::clear() {
|
|||
*/
|
||||
void SearchListView::refresh()
|
||||
{
|
||||
QModelIndex index = this->currentIndex();
|
||||
m_model->refresh();
|
||||
if(index.row() >= 0 && index.row() < m_model->length() && m_isSelected) {
|
||||
this->blockSignals(true);
|
||||
this->setCurrentIndex(index);
|
||||
this->blockSignals(false);
|
||||
}
|
||||
rowheight = this->rowHeight(this->model()->index(0, 0, QModelIndex())) + 1;
|
||||
this->setFixedHeight(m_item->getCurrentSize() * rowheight + 4);
|
||||
}
|
||||
|
|
|
@ -47,6 +47,8 @@ bool ShowMoreLabel::getExpanded() {
|
|||
}
|
||||
|
||||
void ShowMoreLabel::initUi() {
|
||||
QPalette pal = palette();
|
||||
pal.setColor(QPalette::WindowText, QColor(55, 144, 250, 255));
|
||||
m_layout = new QHBoxLayout(this);
|
||||
m_layout->setContentsMargins(0, 0, 0, 6);
|
||||
m_textLabel = new QLabel(this);
|
||||
|
@ -58,7 +60,7 @@ void ShowMoreLabel::initUi() {
|
|||
// m_loadingIconLabel->hide();
|
||||
m_layout->setAlignment(Qt::AlignRight);
|
||||
m_layout->addWidget(m_textLabel);
|
||||
m_textLabel->setStyleSheet("QLabel{font-size: 14px; color: #3790FA}");
|
||||
m_textLabel->setPalette(pal);
|
||||
// m_layout->addWidget(m_loadingIconLabel);
|
||||
}
|
||||
|
||||
|
|
|
@ -126,8 +126,6 @@ void CreateIndexAskDialog::initUi() {
|
|||
* @brief CreateIndexAskDialog::paintEvent 绘制窗口背景(默认背景较暗)
|
||||
*/
|
||||
void CreateIndexAskDialog::paintEvent(QPaintEvent *event) {
|
||||
Q_UNUSED(event)
|
||||
|
||||
QPainter p(this);
|
||||
p.setRenderHint(QPainter::Antialiasing);
|
||||
QPainterPath rectPath;
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include <QStyleOption>
|
||||
#include <QApplication>
|
||||
#include <QPainter>
|
||||
#include <QPainterPath>
|
||||
|
||||
namespace Zeeker {
|
||||
class CreateIndexAskDialog : public QDialog {
|
||||
|
|
|
@ -1,49 +0,0 @@
|
|||
/*
|
||||
*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Authors: zhangjiaping <zhangjiaping@kylinos.cn>
|
||||
*
|
||||
*/
|
||||
|
||||
#include "custom-style.h"
|
||||
|
||||
using namespace Zeeker;
|
||||
static CustomStyle *customstyle_global_instance = nullptr;
|
||||
CustomStyle::CustomStyle(QStyle *style) {
|
||||
|
||||
}
|
||||
CustomStyle *CustomStyle::getStyle()
|
||||
{
|
||||
if (!customstyle_global_instance)
|
||||
customstyle_global_instance = new CustomStyle;
|
||||
return customstyle_global_instance;
|
||||
}
|
||||
|
||||
QSize CustomStyle::sizeFromContents(QStyle::ContentsType type, const QStyleOption *option, const QSize &contentsSize, const QWidget *widget) const {
|
||||
switch(type) {
|
||||
case CT_ItemViewItem: {
|
||||
QSize size(0, GlobalSettings::getInstance()->getValue(FONT_SIZE_KEY).toDouble() * 2);
|
||||
return size;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return QProxyStyle::sizeFromContents(type, option, contentsSize, widget);
|
||||
}
|
||||
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
/*
|
||||
*
|
||||
* Copyright (C) 2020, KylinSoft Co., Ltd.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Authors: zhangjiaping <zhangjiaping@kylinos.cn>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CUSTOMSTYLE_H
|
||||
#define CUSTOMSTYLE_H
|
||||
#include <QProxyStyle>
|
||||
#include "global-settings.h"
|
||||
|
||||
namespace Zeeker {
|
||||
class CustomStyle : public QProxyStyle {
|
||||
Q_OBJECT
|
||||
public:
|
||||
static CustomStyle *getStyle();
|
||||
virtual QSize sizeFromContents(QStyle::ContentsType type, const QStyleOption *option, const QSize &contentsSize, const QWidget *widget = nullptr) const;
|
||||
private:
|
||||
explicit CustomStyle(QStyle *style = nullptr);
|
||||
~CustomStyle() override {}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CUSTOMSTYLE_H
|
|
@ -98,7 +98,6 @@ void SearchBarHLayout::initUI() {
|
|||
|
||||
m_queryWidget = new QWidget(m_queryLineEdit);
|
||||
m_queryWidget->setFocusPolicy(Qt::NoFocus);
|
||||
m_queryWidget->setStyleSheet("border:0px;background:transparent");
|
||||
|
||||
QHBoxLayout* queryWidLayout = new QHBoxLayout;
|
||||
queryWidLayout->setContentsMargins(8, 4, 0, 0);
|
||||
|
@ -109,13 +108,12 @@ void SearchBarHLayout::initUI() {
|
|||
|
||||
QPixmap pixmap(QIcon::fromTheme("system-search-symbolic").pixmap(QSize(20, 20)));
|
||||
m_queryIcon = new QLabel;
|
||||
m_queryIcon->setStyleSheet("background:transparent");
|
||||
m_queryIcon->setFixedSize(pixmap.size());
|
||||
m_queryIcon->setPixmap(pixmap);
|
||||
|
||||
m_queryText = new QLabel;
|
||||
m_queryText->setText(tr("Search"));
|
||||
m_queryText->setStyleSheet("background:transparent;color:#626c6e;");
|
||||
m_queryText->setEnabled(false);
|
||||
m_queryText->setContentsMargins(0, 0, 0, 4);
|
||||
m_queryText->adjustSize();
|
||||
|
||||
|
@ -196,7 +194,8 @@ bool SearchBarHLayout::eventFilter(QObject *watched, QEvent *event) {
|
|||
*/
|
||||
SearchLineEdit::SearchLineEdit() {
|
||||
this->setFocusPolicy(Qt::ClickFocus);
|
||||
this->installEventFilter(this);
|
||||
this->setAttribute(Qt::WA_InputMethodEnabled);
|
||||
// this->installEventFilter(this);
|
||||
// this->setContextMenuPolicy(Qt::NoContextMenu);
|
||||
this->setMaxLength(100);
|
||||
|
||||
|
@ -215,7 +214,6 @@ SearchLineEdit::SearchLineEdit() {
|
|||
// popView->setProperty("customShadowMargins", QVector4D(20, 20, 20, 20));
|
||||
// popView->setAttribute(Qt::WA_TranslucentBackground);
|
||||
// m_completer->setPopup(popView);
|
||||
// m_completer->popup()->setStyle(CustomStyle::getStyle());
|
||||
m_completer->setMaxVisibleItems(14);
|
||||
|
||||
setCompleter(m_completer);
|
||||
|
|
|
@ -174,6 +174,13 @@ int main(int argc, char *argv[]) {
|
|||
syslog(LOG_ERR, "Home is not exits!!\n");
|
||||
::sleep(1);
|
||||
}
|
||||
while(!QDir("/media/用户保险箱").exists()) {
|
||||
qWarning() << "集中管控目录未挂载!!";
|
||||
printf("集中管控目录未挂载!!");
|
||||
syslog(LOG_ERR, "集中管控目录未挂载!!\n");
|
||||
::sleep(1);
|
||||
}
|
||||
|
||||
|
||||
// Output log to file
|
||||
qInstallMessageHandler(messageOutput);
|
||||
|
|
|
@ -263,7 +263,7 @@ void MainWindow::initUi() {
|
|||
mainlayout->addWidget(m_titleFrame);
|
||||
mainlayout->addWidget(m_contentFrame);
|
||||
mainlayout->addWidget(m_searchWidget);
|
||||
connect(m_contentFrame, &ContentWidget::mousePressed, m_searchLayout, &SearchBarHLayout::effectiveSearchRecord);
|
||||
connect(m_contentFrame, &ContentWidget::effectiveSearch, m_searchLayout, &SearchBarHLayout::effectiveSearchRecord);
|
||||
|
||||
connect(QApplication::primaryScreen(), &QScreen::geometryChanged,
|
||||
this, &MainWindow::monitorResolutionChange);
|
||||
|
@ -287,7 +287,7 @@ void MainWindow::initUi() {
|
|||
if(! m_search_result_thread->isRunning()) {
|
||||
m_search_result_thread->start();
|
||||
}
|
||||
searchContent(text);
|
||||
startSearch(text);
|
||||
// //允许弹窗且当前次搜索(为关闭主界面,算一次搜索过程)未询问且当前为暴力搜索
|
||||
// if(GlobalSettings::getInstance()->getValue(ENABLE_CREATE_INDEX_ASK_DIALOG).toString() != "false" && !m_currentSearchAsked && FileUtils::searchMethod == FileUtils::SearchMethod::DIRECTSEARCH)
|
||||
// m_askTimer->start();
|
||||
|
@ -389,10 +389,10 @@ void MainWindow::primaryScreenChangedSlot(QScreen *screen) {
|
|||
}
|
||||
|
||||
/**
|
||||
* @brief searchContent 搜索关键字
|
||||
* @param searchcontent
|
||||
* @brief startSearch 搜索关键字
|
||||
* @param keyword
|
||||
*/
|
||||
void MainWindow::searchContent(QString keyword) {
|
||||
void MainWindow::startSearch(QString keyword) {
|
||||
m_contentFrame->setKeyword(keyword);
|
||||
|
||||
//设置搜索
|
||||
|
|
|
@ -81,7 +81,7 @@ public:
|
|||
*/
|
||||
|
||||
// The parameter:keyword is the word or sentence which users want to search.
|
||||
void searchContent(QString keyword);
|
||||
void startSearch(QString keyword);
|
||||
|
||||
// The position which mainwindow shows follow the ukui-panel.
|
||||
void moveToPanel();
|
||||
|
|
|
@ -182,12 +182,6 @@ void SearchItemModel::setBestAppIcon(const QString &str, const bool & is_install
|
|||
}
|
||||
}
|
||||
|
||||
void SearchItemModel::refresh()
|
||||
{
|
||||
this->beginResetModel();
|
||||
this->endResetModel();
|
||||
}
|
||||
|
||||
int SearchItemModel::length()
|
||||
{
|
||||
return m_item->m_pathlist.length();
|
||||
|
|
|
@ -59,7 +59,6 @@ public:
|
|||
void removeItem(QString);
|
||||
void clear();
|
||||
void setBestAppIcon(const QString &, const bool &);
|
||||
void refresh();
|
||||
int length();
|
||||
|
||||
private :
|
||||
|
|
|
@ -5,21 +5,32 @@ size_t uniqueSymbol = 0;
|
|||
QMutex m_mutex;
|
||||
|
||||
SearchAppThread::SearchAppThread(QObject *parent) : QObject(parent) {
|
||||
m_pool.setMaxThreadCount(1);
|
||||
m_pool.setMaxThreadCount(2);
|
||||
m_pool.setExpiryTimeout(1000);
|
||||
}
|
||||
|
||||
void SearchAppThread::startSearch(const QString & keyword) {
|
||||
m_mutex.lock();
|
||||
++uniqueSymbol;
|
||||
m_mutex.unlock();
|
||||
SearchApp *appsearch;
|
||||
appsearch = new SearchApp(keyword);
|
||||
appsearch = new SearchApp(keyword,uniqueSymbol,this);
|
||||
// appsearch->setKeyword(keyword);
|
||||
connect(appsearch, &SearchApp::searchResultApp, this, &SearchAppThread::searchResultApp);
|
||||
// connect(appsearch, &SearchApp::searchResultApp, this, &SearchAppThread::searchResultApp);
|
||||
m_pool.start(appsearch);
|
||||
}
|
||||
|
||||
void SearchAppThread::sendResult(const QVector<QStringList> result)
|
||||
{
|
||||
Q_EMIT this->searchResultApp(result);
|
||||
}
|
||||
|
||||
SearchApp::SearchApp(const QString& keyword, QObject * parent) : QObject(parent) {
|
||||
|
||||
SearchApp::SearchApp(const QString& keyword, size_t uniqueSymbol, QObject * parent) : QObject(parent) {
|
||||
this->setAutoDelete(true);
|
||||
m_searchappThread = qobject_cast<SearchAppThread *>(parent);
|
||||
m_keyword = keyword;
|
||||
m_uniqueSymbol = uniqueSymbol;
|
||||
}
|
||||
|
||||
SearchApp::~SearchApp() {
|
||||
|
@ -35,11 +46,6 @@ SearchApp::~SearchApp() {
|
|||
//}
|
||||
|
||||
void SearchApp::run() {
|
||||
m_mutex.lock();
|
||||
size_t tmp_uniqueSymbol;
|
||||
uniqueSymbol++;
|
||||
tmp_uniqueSymbol = uniqueSymbol;
|
||||
m_mutex.unlock();
|
||||
//nameList:应用名,pathList:已安装的是.desktop路径,未安装为空,iconList:已安装的是图标名,未安装的是图标路径
|
||||
QStringList nameList, pathList, iconList, descList;
|
||||
QVector<QStringList> appVector;
|
||||
|
@ -70,8 +76,9 @@ void SearchApp::run() {
|
|||
appVector.append(iconList);
|
||||
appVector.append(descList);
|
||||
m_mutex.lock();
|
||||
if (tmp_uniqueSymbol == uniqueSymbol) {
|
||||
Q_EMIT this->searchResultApp(appVector);
|
||||
if (m_uniqueSymbol == uniqueSymbol) {
|
||||
QMetaObject::invokeMethod(m_searchappThread, "sendResult", Q_ARG(const QVector<QStringList>, appVector));
|
||||
// Q_EMIT this->searchResultApp(appVector);
|
||||
}
|
||||
m_mutex.unlock();
|
||||
m_installed_apps.clear();
|
||||
|
|
|
@ -13,27 +13,30 @@ public:
|
|||
SearchAppThread(QObject * parent = nullptr);
|
||||
~SearchAppThread() = default;
|
||||
void startSearch(const QString&);
|
||||
Q_INVOKABLE void sendResult(const QVector<QStringList> result);
|
||||
private:
|
||||
QThreadPool m_pool;
|
||||
Q_SIGNALS:
|
||||
void searchResultApp(const QVector<QStringList>&);
|
||||
void searchResultApp(const QVector<QStringList>);
|
||||
};
|
||||
|
||||
|
||||
class SearchApp : public QObject, public QRunnable {
|
||||
Q_OBJECT
|
||||
public:
|
||||
SearchApp(const QString& keyword, QObject * parent = nullptr);
|
||||
SearchApp(const QString& keyword, size_t uniqueSymbol, QObject * parent = nullptr);
|
||||
~SearchApp();
|
||||
// void setKeyword(const QString&);
|
||||
protected:
|
||||
void run() override;
|
||||
private:
|
||||
SearchAppThread *m_searchappThread = nullptr;
|
||||
QString m_keyword;
|
||||
size_t m_uniqueSymbol;
|
||||
QMap<NameString, QStringList> m_installed_apps;
|
||||
QMap<NameString, QStringList> m_uninstalled_apps;
|
||||
Q_SIGNALS:
|
||||
void searchResultApp(const QVector<QStringList>&);
|
||||
//Q_SIGNALS:
|
||||
// void searchResultApp(const QVector<QStringList>&);
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -57,6 +57,8 @@ SettingsWidget::~SettingsWidget() {
|
|||
* @brief SettingsWidget::initUi 初始化界面UI
|
||||
*/
|
||||
void SettingsWidget::initUi() {
|
||||
QPalette pal = palette();
|
||||
pal.setColor(QPalette::Window, QColor(0, 0, 0, 0));
|
||||
// this->setFixedWidth(528);
|
||||
// this->setMinimumHeight(460);
|
||||
// this->setMaximumHeight(680);
|
||||
|
@ -81,8 +83,6 @@ void SettingsWidget::initUi() {
|
|||
m_closeBtn = new QPushButton(m_titleFrame);
|
||||
m_closeBtn->setFixedSize(24, 24);
|
||||
// m_closeBtn->setIcon(QIcon(":/res/icons/close.svg"));
|
||||
// m_closeBtn->setStyleSheet("QPushButton{background: transparent;}"
|
||||
// "QPushButton:hover:!pressed{background: transparent;}");
|
||||
m_closeBtn->setIcon(QIcon::fromTheme("window-close-symbolic"));
|
||||
m_closeBtn->setProperty("isWindowButton", 0x02);
|
||||
m_closeBtn->setProperty("useIconHighlightEffect", 0x08);
|
||||
|
@ -139,9 +139,9 @@ void SettingsWidget::initUi() {
|
|||
m_indexBtnLyt->addWidget(m_addDirBtn);
|
||||
m_indexBtnLyt->addStretch();
|
||||
m_dirListArea = new QScrollArea(m_contentFrame);
|
||||
m_dirListArea->setStyleSheet("QScrollArea{background:transparent;}");
|
||||
m_dirListArea->setPalette(pal);
|
||||
m_dirListArea->setFrameShape(QFrame::Shape::NoFrame);
|
||||
m_dirListWidget = new QWidget(m_contentFrame);
|
||||
m_dirListWidget->setStyleSheet("QWidget{background:transparent;}");
|
||||
m_dirListLyt = new QVBoxLayout(m_dirListWidget);
|
||||
m_dirListLyt->setContentsMargins(0, 0, 0, 0);
|
||||
m_dirListLyt->setSpacing(0);
|
||||
|
|
|
@ -28,7 +28,6 @@ include(singleapplication/qt-single-application.pri)
|
|||
SOURCES += \
|
||||
content-widget.cpp \
|
||||
create-index-ask-dialog.cpp \
|
||||
custom-style.cpp \
|
||||
input-box.cpp \
|
||||
main.cpp \
|
||||
mainwindow.cpp \
|
||||
|
@ -41,7 +40,6 @@ SOURCES += \
|
|||
HEADERS += \
|
||||
content-widget.h \
|
||||
create-index-ask-dialog.h \
|
||||
custom-style.h \
|
||||
input-box.h \
|
||||
mainwindow.h \
|
||||
search-app-thread.h \
|
||||
|
|
Loading…
Reference in New Issue