Merge branch '0520-dev' into 'main'

Merge from github

See merge request kylin-desktop/ukui-search!19
This commit is contained in:
PengfeiZhang 2021-05-31 11:18:02 +00:00
commit 09040366ea
167 changed files with 13962 additions and 13120 deletions

View File

@ -0,0 +1,39 @@
<schemalist gettext-domain="ukui-log4qt-ukui-search">
<schema id="org.ukui.ukui-log4qt-ukui-search" path="/org/ukui/ukui-log4qt-ukui-search/">
<key type="s" name="log4j-handleqtmessages">
<default>"true"</default>
<summary>hook qt messages</summary>
<description>Control if hook qt messages</description>
</key>
<key type="s" name="log4j-rootlogger">
<default>"WARN,console,daily"</default>
<summary>config rootLogger's level and appenders</summary>
<description>config rootLogger's level and appenders:"level,appender"</description>
</key>
<key type="s" name="log4j-appender-daily-datepattern">
<default>".yyyy-MM-dd"</default>
<summary>daily log file pattern</summary>
<description>set daily log file pattern format:one day</description>
</key>
<key type="s" name="log4j-appender-daily-layout-conversionpattern">
<default>"%d{yyyy-MM-dd HH:mm:ss,zzz}(%-4r)[%t]|%-5p| - %m%n"</default>
<summary>set log message's format</summary>
<description>set log message's format</description>
</key>
<key type="i" name="delaytime">
<default>3600</default>
<summary>set check log files delay time</summary>
<description>set check log files delay time</description>
</key>
<key type="i" name="maxfilecount">
<default>7</default>
<summary>set log files count</summary>
<description>set log files count,unit s</description>
</key>
<key type="i" name="maxfilesize">
<default>512</default>
<summary>set log files total size</summary>
<description>set log files total size, unit M</description>
</key>
</schema>
</schemalist>

View File

@ -0,0 +1,14 @@
<schemalist gettext-domain="ukui-search">
<schema id="org.ukui.search.settings" path="/org/ukui/ukui-search/settings/">
<key name="index-search" type="b">
<default>false</default>
<summary>search method</summary>
<description>Is current search-method index-search.</description>
</key>
<key name="web-engine" type="s">
<default>"baidu"</default>
<summary>web engine</summary>
<description>Web engine to search keyword online.</description>
</key>
</schema>
</schemalist>

58
debian/changelog vendored
View File

@ -1,3 +1,61 @@
ukui-search (0.4.0+0520) v101; urgency=medium
* Bug 55034,55545,55326,55496
* 任务29459
* 其他改动:
* Fix:Seletion is cleared when paletted changed.
- 修复收到palettechanged信号时列表选中状态消失的问题。
* Fix:Icon in detail view will not refresh when icon-theme changed.
- 修复主题图标改变时详情页图标未更新的问题。
* Fix:Index process crashed when parsing some wps templates.
- 修复了解析某些wps模板文件时索引崩溃的问题。
* Fix:Block list conf won't work.
- 修复了在控制面板设置黑名单不能及时生效的问题。
* Fix:Automatic completion won't work in next search after clicked actions in detial widget.
- 修复了点击详情页不会保存搜索历史记录的bug。
-- zhangpengfei <zhangpengfei@kylinos.cn> Thu, 20 May 2021 09:08:15 +0800
ukui-search (0.4.0+0508) v101; urgency=medium
* Bug 49153.
* 任务
* 其他改动:
* Android app can be search now.
- 新增搜索安卓兼容目录下应用功能。
* Start ukui-control-center on settings button clicked.
- 点击设置按钮,跳转到控制面板。
* Fix(frontend): Ssearch list & dialog will not refresh when fontsize changed.
- 修复了当弹出创建索引提示框时修改系统字体大小会出现显示错乱的bug。
* Fix: blacklist will not effective when search method is direct search.
-修复了当选择搜索方式为不创建索引搜索时黑名单无效的问题。
* Discard ukui-log4qt for log printing.
-由于日志模块偶现卡死bug移除了ukui-log4qt日志打印功能。
-- zhangpengfei <zhangpengfei@kylinos.cn> Fri, 08 May 2021 15:35:06 +0800
ukui-search (0.4.0+0422-1) v101; urgency=medium
* Bug 45037,45035,47971,26454.
* 任务 25405.
* 其他改动:
* Add support for 'pptx','xlsx','pdf'.
- 增加了对'pptx','xlsx','pdf'格式文本内容解析的支持。
* Fix(frontend): Width of scrollArea is not enough which caused some text displayed incompletely.
- 修复部分区域预留宽度不够导致控件显示不全
- 和出现不必要滚动条的问题(45037,45035)
* Feature(frontend):Add double click event to search list view.
- 新增搜索结果列表双击打开功能。
* Fix: Mainwindow will not refresh sometime theme changed.
- 修复修改完透明度后修改主题,主界面颜色未随主题切换的问题。
* Fix: Font-size & row height will not refresh in search list.
- 修复字体大小改变时搜索结果列表行高和fontmetircs不刷新的bug(47971,26454)。
* Add search method changed function --index search and bfs search.
- 增加搜索方式切换功能,可以在遍历搜索和索引搜索之间切换。
* Use ukui-log4qt for log printing.
- 使用ukui-log4qt打印日志(25405)。
-- zhangpengfei <zhangpengfei@kylinos.cn> Wed, 21 Apr 2021 09:36:39 +0800
ukui-search (0.3.0+0406) v101; urgency=medium
* Add support for '.xls', '.dot','.wps', '.pps', '.dps', '.et','.ppt'.

3
debian/control vendored
View File

@ -17,7 +17,8 @@ Build-Depends: debhelper (>=9.0.0),
libgsettings-qt-dev,
libqt5x11extras5-dev,
libuchardet-dev,
libpoppler-qt5-dev
libpoppler-qt5-dev,
libukui-log4qt-dev
Standards-Version: 4.5.0
Homepage: https://www.ukui.org/
Vcs-Git: https://github.com/ukui/ukui-search.git

View File

@ -0,0 +1,2 @@
usr/include/chinese-seg/*
usr/lib/*/libchinese-segmentation.so

View File

@ -1,2 +1,2 @@
usr/lib/*.so.*
usr/lib/*/libchinese-segmentation.so.*
/usr/share/ukui-search/res/dict/*.utf8

View File

@ -1,2 +1,2 @@
usr/include/ukui-search/*
usr/lib/*/*.so
usr/lib/*/libukui-search.so

View File

@ -1 +1 @@
usr/lib/*/*.so.*
usr/lib/*/libukui-search.so.*

View File

@ -2,3 +2,4 @@ usr/bin/ukui-search
etc/xdg/autostart/*.desktop
usr/share/applications/*.desktop
src/.qm/*.qm usr/share/ukui-search/translations
usr/share/glib-2.0/schemas/*.xml

View File

@ -24,11 +24,10 @@
static ChineseSegmentation *global_instance_chinese_segmentation = nullptr;
QMutex ChineseSegmentation::m_mutex;
ChineseSegmentation::ChineseSegmentation()
{
ChineseSegmentation::ChineseSegmentation() {
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
const char * const USER_DICT_PATH ="/usr/share/ukui-search/res/dict/user.dict.utf8";
const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
@ -39,27 +38,24 @@ ChineseSegmentation::ChineseSegmentation()
STOP_WORD_PATH);
}
ChineseSegmentation::~ChineseSegmentation()
{
ChineseSegmentation::~ChineseSegmentation() {
if(m_jieba)
delete m_jieba;
m_jieba = nullptr;
}
ChineseSegmentation *ChineseSegmentation::getInstance()
{
ChineseSegmentation *ChineseSegmentation::getInstance() {
QMutexLocker locker(&m_mutex);
if (!global_instance_chinese_segmentation) {
if(!global_instance_chinese_segmentation) {
global_instance_chinese_segmentation = new ChineseSegmentation;
}
return global_instance_chinese_segmentation;
}
QVector<SKeyWord> ChineseSegmentation::callSegement(QString str)
{
std::string s;
s=str.toStdString();
str.squeeze();
QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
// std::string s;
// s = str.toStdString();
// str.squeeze();
const size_t topk = -1;
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
@ -69,16 +65,15 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(QString str)
convert(keywordres, vecNeeds);
keywordres.clear();
keywordres.shrink_to_fit();
// keywordres.shrink_to_fit();
return vecNeeds;
}
void ChineseSegmentation::convert(std::vector<cppjieba::KeywordExtractor::Word> &keywordres, QVector<SKeyWord> &kw)
{
for (auto i : keywordres){
void ChineseSegmentation::convert(std::vector<cppjieba::KeywordExtractor::Word> &keywordres, QVector<SKeyWord> &kw) {
for(auto i : keywordres) {
SKeyWord temp;
temp.word = i.word;
temp.offsets = QVector<size_t>::fromStdVector(i.offsets);

View File

@ -32,24 +32,23 @@
#include <QDebug>
#include <QMutex>
struct SKeyWord{
struct SKeyWord {
std::string word;
QVector<size_t> offsets;
double weight;
~SKeyWord(){
~SKeyWord() {
word = std::move("");
offsets.clear();
offsets.shrink_to_fit();
}
};
class CHINESESEGMENTATION_EXPORT ChineseSegmentation
{
class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
public:
static ChineseSegmentation *getInstance();
~ChineseSegmentation();
QVector<SKeyWord> callSegement(QString str);
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
QVector<SKeyWord> callSegement(std::string s);
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres, QVector<SKeyWord>& kw);
private:
static QMutex m_mutex;
cppjieba::Jieba *m_jieba;

View File

@ -43,252 +43,247 @@ const size_t DICT_COLUMN_NUM = 3;
const char* const UNKNOWN_TAG = "";
class DictTrie {
public:
enum UserWordWeightOption {
WordWeightMin,
WordWeightMedian,
WordWeightMax,
}; // enum UserWordWeightOption
public:
enum UserWordWeightOption {
WordWeightMin,
WordWeightMedian,
WordWeightMax,
}; // enum UserWordWeightOption
DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
Init(dict_path, user_dict_paths, user_word_weight_opt);
}
~DictTrie() {
delete trie_;
}
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
DictUnit node_info;
if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
return false;
DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
Init(dict_path, user_dict_paths, user_word_weight_opt);
}
active_node_infos_.push_back(node_info);
trie_->InsertNode(node_info.word, &active_node_infos_.back());
return true;
}
bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) {
DictUnit node_info;
double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
if (!MakeNodeInfo(node_info, word, weight , tag)) {
return false;
~DictTrie() {
delete trie_;
}
active_node_infos_.push_back(node_info);
trie_->InsertNode(node_info.word, &active_node_infos_.back());
return true;
}
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
return trie_->Find(begin, end);
}
void Find(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<struct Dag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const {
trie_->Find(begin, end, res, max_word_len);
}
bool Find(const string& word)
{
const DictUnit *tmp = NULL;
RuneStrArray runes;
if (!DecodeRunesInString(word, runes))
{
XLOG(ERROR) << "Decode failed.";
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
DictUnit node_info;
if(!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
return false;
}
active_node_infos_.push_back(node_info);
trie_->InsertNode(node_info.word, &active_node_infos_.back());
return true;
}
tmp = Find(runes.begin(), runes.end());
if (tmp == NULL)
{
return false;
bool InsertUserWord(const string& word, int freq, const string& tag = UNKNOWN_TAG) {
DictUnit node_info;
double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
if(!MakeNodeInfo(node_info, word, weight, tag)) {
return false;
}
active_node_infos_.push_back(node_info);
trie_->InsertNode(node_info.word, &active_node_infos_.back());
return true;
}
else
{
return true;
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
return trie_->Find(begin, end);
}
}
bool IsUserDictSingleChineseWord(const Rune& word) const {
return IsIn(user_dict_single_chinese_word_, word);
}
void Find(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<struct Dag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const {
trie_->Find(begin, end, res, max_word_len);
}
double GetMinWeight() const {
return min_weight_;
}
bool Find(const string& word) {
const DictUnit *tmp = NULL;
RuneStrArray runes;
if(!DecodeRunesInString(word, runes)) {
XLOG(ERROR) << "Decode failed.";
}
tmp = Find(runes.begin(), runes.end());
if(tmp == NULL) {
return false;
} else {
return true;
}
}
void InserUserDictNode(const string& line) {
vector<string> buf;
DictUnit node_info;
Split(line, buf, " ");
if(buf.size() == 1){
MakeNodeInfo(node_info,
buf[0],
user_word_default_weight_,
UNKNOWN_TAG);
} else if (buf.size() == 2) {
MakeNodeInfo(node_info,
buf[0],
user_word_default_weight_,
buf[1]);
} else if (buf.size() == 3) {
int freq = atoi(buf[1].c_str());
assert(freq_sum_ > 0.0);
double weight = log(1.0 * freq / freq_sum_);
MakeNodeInfo(node_info, buf[0], weight, buf[2]);
bool IsUserDictSingleChineseWord(const Rune& word) const {
return IsIn(user_dict_single_chinese_word_, word);
}
double GetMinWeight() const {
return min_weight_;
}
void InserUserDictNode(const string& line) {
vector<string> buf;
DictUnit node_info;
Split(line, buf, " ");
if(buf.size() == 1) {
MakeNodeInfo(node_info,
buf[0],
user_word_default_weight_,
UNKNOWN_TAG);
} else if(buf.size() == 2) {
MakeNodeInfo(node_info,
buf[0],
user_word_default_weight_,
buf[1]);
} else if(buf.size() == 3) {
int freq = atoi(buf[1].c_str());
assert(freq_sum_ > 0.0);
double weight = log(1.0 * freq / freq_sum_);
MakeNodeInfo(node_info, buf[0], weight, buf[2]);
}
static_node_infos_.push_back(node_info);
if (node_info.word.size() == 1) {
user_dict_single_chinese_word_.insert(node_info.word[0]);
if(node_info.word.size() == 1) {
user_dict_single_chinese_word_.insert(node_info.word[0]);
}
}
void LoadUserDict(const vector<string>& buf) {
for (size_t i = 0; i < buf.size(); i++) {
InserUserDictNode(buf[i]);
}
}
void LoadUserDict(const set<string>& buf) {
std::set<string>::const_iterator iter;
for (iter = buf.begin(); iter != buf.end(); iter++){
InserUserDictNode(*iter);
}
}
void LoadUserDict(const string& filePaths) {
vector<string> files = limonp::Split(filePaths, "|;");
size_t lineno = 0;
for (size_t i = 0; i < files.size(); i++) {
ifstream ifs(files[i].c_str());
XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
string line;
for (; getline(ifs, line); lineno++) {
if (line.size() == 0) {
continue;
void LoadUserDict(const vector<string>& buf) {
for(size_t i = 0; i < buf.size(); i++) {
InserUserDictNode(buf[i]);
}
InserUserDictNode(line);
}
}
}
private:
void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) {
LoadDict(dict_path);
freq_sum_ = CalcFreqSum(static_node_infos_);
CalculateWeight(static_node_infos_, freq_sum_);
SetStaticWordWeights(user_word_weight_opt);
if (user_dict_paths.size()) {
LoadUserDict(user_dict_paths);
}
Shrink(static_node_infos_);
CreateTrie(static_node_infos_);
}
void CreateTrie(const vector<DictUnit>& dictUnits) {
assert(dictUnits.size());
vector<Unicode> words;
vector<const DictUnit*> valuePointers;
for (size_t i = 0 ; i < dictUnits.size(); i ++) {
words.push_back(dictUnits[i].word);
valuePointers.push_back(&dictUnits[i]);
}
trie_ = new Trie(words, valuePointers);
}
bool MakeNodeInfo(DictUnit& node_info,
const string& word,
double weight,
const string& tag) {
if (!DecodeRunesInString(word, node_info.word)) {
XLOG(ERROR) << "Decode " << word << " failed.";
return false;
void LoadUserDict(const set<string>& buf) {
std::set<string>::const_iterator iter;
for(iter = buf.begin(); iter != buf.end(); iter++) {
InserUserDictNode(*iter);
}
}
node_info.weight = weight;
node_info.tag = tag;
return true;
}
void LoadDict(const string& filePath) {
ifstream ifs(filePath.c_str());
XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
string line;
vector<string> buf;
void LoadUserDict(const string& filePaths) {
vector<string> files = limonp::Split(filePaths, "|;");
size_t lineno = 0;
for(size_t i = 0; i < files.size(); i++) {
ifstream ifs(files[i].c_str());
XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
string line;
DictUnit node_info;
for (size_t lineno = 0; getline(ifs, line); lineno++) {
Split(line, buf, " ");
XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
MakeNodeInfo(node_info,
buf[0],
atof(buf[1].c_str()),
buf[2]);
static_node_infos_.push_back(node_info);
for(; getline(ifs, line); lineno++) {
if(line.size() == 0) {
continue;
}
InserUserDictNode(line);
}
}
}
}
static bool WeightCompare(const DictUnit& lhs, const DictUnit& rhs) {
return lhs.weight < rhs.weight;
}
void SetStaticWordWeights(UserWordWeightOption option) {
XCHECK(!static_node_infos_.empty());
vector<DictUnit> x = static_node_infos_;
sort(x.begin(), x.end(), WeightCompare);
min_weight_ = x[0].weight;
max_weight_ = x[x.size() - 1].weight;
median_weight_ = x[x.size() / 2].weight;
switch (option) {
case WordWeightMin:
user_word_default_weight_ = min_weight_;
break;
case WordWeightMedian:
user_word_default_weight_ = median_weight_;
break;
default:
user_word_default_weight_ = max_weight_;
break;
private:
void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) {
LoadDict(dict_path);
freq_sum_ = CalcFreqSum(static_node_infos_);
CalculateWeight(static_node_infos_, freq_sum_);
SetStaticWordWeights(user_word_weight_opt);
if(user_dict_paths.size()) {
LoadUserDict(user_dict_paths);
}
Shrink(static_node_infos_);
CreateTrie(static_node_infos_);
}
}
double CalcFreqSum(const vector<DictUnit>& node_infos) const {
double sum = 0.0;
for (size_t i = 0; i < node_infos.size(); i++) {
sum += node_infos[i].weight;
void CreateTrie(const vector<DictUnit>& dictUnits) {
assert(dictUnits.size());
vector<Unicode> words;
vector<const DictUnit*> valuePointers;
for(size_t i = 0 ; i < dictUnits.size(); i ++) {
words.push_back(dictUnits[i].word);
valuePointers.push_back(&dictUnits[i]);
}
trie_ = new Trie(words, valuePointers);
}
return sum;
}
void CalculateWeight(vector<DictUnit>& node_infos, double sum) const {
assert(sum > 0.0);
for (size_t i = 0; i < node_infos.size(); i++) {
DictUnit& node_info = node_infos[i];
assert(node_info.weight > 0.0);
node_info.weight = log(double(node_info.weight)/sum);
bool MakeNodeInfo(DictUnit& node_info,
const string& word,
double weight,
const string& tag) {
if(!DecodeRunesInString(word, node_info.word)) {
XLOG(ERROR) << "Decode " << word << " failed.";
return false;
}
node_info.weight = weight;
node_info.tag = tag;
return true;
}
}
void Shrink(vector<DictUnit>& units) const {
vector<DictUnit>(units.begin(), units.end()).swap(units);
}
void LoadDict(const string& filePath) {
ifstream ifs(filePath.c_str());
XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
string line;
vector<string> buf;
vector<DictUnit> static_node_infos_;
deque<DictUnit> active_node_infos_; // must not be vector
Trie * trie_;
DictUnit node_info;
for(size_t lineno = 0; getline(ifs, line); lineno++) {
Split(line, buf, " ");
XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
MakeNodeInfo(node_info,
buf[0],
atof(buf[1].c_str()),
buf[2]);
static_node_infos_.push_back(node_info);
}
}
double freq_sum_;
double min_weight_;
double max_weight_;
double median_weight_;
double user_word_default_weight_;
unordered_set<Rune> user_dict_single_chinese_word_;
static bool WeightCompare(const DictUnit& lhs, const DictUnit& rhs) {
return lhs.weight < rhs.weight;
}
void SetStaticWordWeights(UserWordWeightOption option) {
XCHECK(!static_node_infos_.empty());
vector<DictUnit> x = static_node_infos_;
sort(x.begin(), x.end(), WeightCompare);
min_weight_ = x[0].weight;
max_weight_ = x[x.size() - 1].weight;
median_weight_ = x[x.size() / 2].weight;
switch(option) {
case WordWeightMin:
user_word_default_weight_ = min_weight_;
break;
case WordWeightMedian:
user_word_default_weight_ = median_weight_;
break;
default:
user_word_default_weight_ = max_weight_;
break;
}
}
double CalcFreqSum(const vector<DictUnit>& node_infos) const {
double sum = 0.0;
for(size_t i = 0; i < node_infos.size(); i++) {
sum += node_infos[i].weight;
}
return sum;
}
void CalculateWeight(vector<DictUnit>& node_infos, double sum) const {
assert(sum > 0.0);
for(size_t i = 0; i < node_infos.size(); i++) {
DictUnit& node_info = node_infos[i];
assert(node_info.weight > 0.0);
node_info.weight = log(double(node_info.weight) / sum);
}
}
void Shrink(vector<DictUnit>& units) const {
vector<DictUnit>(units.begin(), units.end()).swap(units);
}
vector<DictUnit> static_node_infos_;
deque<DictUnit> active_node_infos_; // must not be vector
Trie * trie_;
double freq_sum_;
double min_weight_;
double max_weight_;
double median_weight_;
double user_word_default_weight_;
unordered_set<Rune> user_dict_single_chinese_word_;
};
}

View File

@ -29,82 +29,82 @@
namespace cppjieba {
class FullSegment: public SegmentBase {
public:
FullSegment(const string& dictPath) {
dictTrie_ = new DictTrie(dictPath);
isNeedDestroy_ = true;
}
FullSegment(const DictTrie* dictTrie)
: dictTrie_(dictTrie), isNeedDestroy_(false) {
assert(dictTrie_);
}
~FullSegment() {
if (isNeedDestroy_) {
delete dictTrie_;
public:
FullSegment(const string& dictPath) {
dictTrie_ = new DictTrie(dictPath);
isNeedDestroy_ = true;
}
}
void Cut(const string& sentence,
vector<string>& words) const {
vector<Word> tmp;
Cut(sentence, tmp);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence,
vector<Word>& words) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size()/2);
while (pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs);
FullSegment(const DictTrie* dictTrie)
: dictTrie_(dictTrie), isNeedDestroy_(false) {
assert(dictTrie_);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<WordRange>& res) const {
// result of searching in trie tree
LocalVector<pair<size_t, const DictUnit*> > tRes;
// max index of res's words
size_t maxIdx = 0;
// always equals to (uItr - begin)
size_t uIdx = 0;
// tmp variables
size_t wordLen = 0;
assert(dictTrie_);
vector<struct Dag> dags;
dictTrie_->Find(begin, end, dags);
for (size_t i = 0; i < dags.size(); i++) {
for (size_t j = 0; j < dags[i].nexts.size(); j++) {
size_t nextoffset = dags[i].nexts[j].first;
assert(nextoffset < dags.size());
const DictUnit* du = dags[i].nexts[j].second;
if (du == NULL) {
if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
WordRange wr(begin + i, begin + nextoffset);
res.push_back(wr);
}
} else {
wordLen = du->word.size();
if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
WordRange wr(begin + i, begin + nextoffset);
res.push_back(wr);
}
~FullSegment() {
if(isNeedDestroy_) {
delete dictTrie_;
}
maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx;
}
uIdx++;
}
}
private:
const DictTrie* dictTrie_;
bool isNeedDestroy_;
void Cut(const string& sentence,
vector<string>& words) const {
vector<Word> tmp;
Cut(sentence, tmp);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence,
vector<Word>& words) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while(pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<WordRange>& res) const {
// result of searching in trie tree
LocalVector<pair<size_t, const DictUnit*> > tRes;
// max index of res's words
size_t maxIdx = 0;
// always equals to (uItr - begin)
size_t uIdx = 0;
// tmp variables
size_t wordLen = 0;
assert(dictTrie_);
vector<struct Dag> dags;
dictTrie_->Find(begin, end, dags);
for(size_t i = 0; i < dags.size(); i++) {
for(size_t j = 0; j < dags[i].nexts.size(); j++) {
size_t nextoffset = dags[i].nexts[j].first;
assert(nextoffset < dags.size());
const DictUnit* du = dags[i].nexts[j].second;
if(du == NULL) {
if(dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
WordRange wr(begin + i, begin + nextoffset);
res.push_back(wr);
}
} else {
wordLen = du->word.size();
if(wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
WordRange wr(begin + i, begin + nextoffset);
res.push_back(wr);
}
}
maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx;
}
uIdx++;
}
}
private:
const DictTrie* dictTrie_;
bool isNeedDestroy_;
};
}

View File

@ -28,118 +28,118 @@ using namespace limonp;
typedef unordered_map<Rune, double> EmitProbMap;
struct HMMModel {
/*
* STATUS:
* 0: HMMModel::B, 1: HMMModel::E, 2: HMMModel::M, 3:HMMModel::S
* */
enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
/*
* STATUS:
* 0: HMMModel::B, 1: HMMModel::E, 2: HMMModel::M, 3:HMMModel::S
* */
enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
HMMModel(const string& modelPath) {
memset(startProb, 0, sizeof(startProb));
memset(transProb, 0, sizeof(transProb));
statMap[0] = 'B';
statMap[1] = 'E';
statMap[2] = 'M';
statMap[3] = 'S';
emitProbVec.push_back(&emitProbB);
emitProbVec.push_back(&emitProbE);
emitProbVec.push_back(&emitProbM);
emitProbVec.push_back(&emitProbS);
LoadModel(modelPath);
}
~HMMModel() {
}
void LoadModel(const string& filePath) {
ifstream ifile(filePath.c_str());
XCHECK(ifile.is_open()) << "open " << filePath << " failed";
string line;
vector<string> tmp;
vector<string> tmp2;
//Load startProb
XCHECK(GetLine(ifile, line));
Split(line, tmp, " ");
XCHECK(tmp.size() == STATUS_SUM);
for (size_t j = 0; j< tmp.size(); j++) {
startProb[j] = atof(tmp[j].c_str());
HMMModel(const string& modelPath) {
memset(startProb, 0, sizeof(startProb));
memset(transProb, 0, sizeof(transProb));
statMap[0] = 'B';
statMap[1] = 'E';
statMap[2] = 'M';
statMap[3] = 'S';
emitProbVec.push_back(&emitProbB);
emitProbVec.push_back(&emitProbE);
emitProbVec.push_back(&emitProbM);
emitProbVec.push_back(&emitProbS);
LoadModel(modelPath);
}
//Load transProb
for (size_t i = 0; i < STATUS_SUM; i++) {
XCHECK(GetLine(ifile, line));
Split(line, tmp, " ");
XCHECK(tmp.size() == STATUS_SUM);
for (size_t j =0; j < STATUS_SUM; j++) {
transProb[i][j] = atof(tmp[j].c_str());
}
~HMMModel() {
}
void LoadModel(const string& filePath) {
ifstream ifile(filePath.c_str());
XCHECK(ifile.is_open()) << "open " << filePath << " failed";
string line;
vector<string> tmp;
vector<string> tmp2;
//Load startProb
XCHECK(GetLine(ifile, line));
Split(line, tmp, " ");
XCHECK(tmp.size() == STATUS_SUM);
for(size_t j = 0; j < tmp.size(); j++) {
startProb[j] = atof(tmp[j].c_str());
}
//Load emitProbB
XCHECK(GetLine(ifile, line));
XCHECK(LoadEmitProb(line, emitProbB));
//Load transProb
for(size_t i = 0; i < STATUS_SUM; i++) {
XCHECK(GetLine(ifile, line));
Split(line, tmp, " ");
XCHECK(tmp.size() == STATUS_SUM);
for(size_t j = 0; j < STATUS_SUM; j++) {
transProb[i][j] = atof(tmp[j].c_str());
}
}
//Load emitProbE
XCHECK(GetLine(ifile, line));
XCHECK(LoadEmitProb(line, emitProbE));
//Load emitProbB
XCHECK(GetLine(ifile, line));
XCHECK(LoadEmitProb(line, emitProbB));
//Load emitProbM
XCHECK(GetLine(ifile, line));
XCHECK(LoadEmitProb(line, emitProbM));
//Load emitProbE
XCHECK(GetLine(ifile, line));
XCHECK(LoadEmitProb(line, emitProbE));
//Load emitProbS
XCHECK(GetLine(ifile, line));
XCHECK(LoadEmitProb(line, emitProbS));
}
double GetEmitProb(const EmitProbMap* ptMp, Rune key,
double defVal)const {
EmitProbMap::const_iterator cit = ptMp->find(key);
if (cit == ptMp->end()) {
return defVal;
//Load emitProbM
XCHECK(GetLine(ifile, line));
XCHECK(LoadEmitProb(line, emitProbM));
//Load emitProbS
XCHECK(GetLine(ifile, line));
XCHECK(LoadEmitProb(line, emitProbS));
}
return cit->second;
}
bool GetLine(ifstream& ifile, string& line) {
while (getline(ifile, line)) {
Trim(line);
if (line.empty()) {
continue;
}
if (StartsWith(line, "#")) {
continue;
}
return true;
double GetEmitProb(const EmitProbMap* ptMp, Rune key,
double defVal)const {
EmitProbMap::const_iterator cit = ptMp->find(key);
if(cit == ptMp->end()) {
return defVal;
}
return cit->second;
}
return false;
}
bool LoadEmitProb(const string& line, EmitProbMap& mp) {
if (line.empty()) {
return false;
}
vector<string> tmp, tmp2;
Unicode unicode;
Split(line, tmp, ",");
for (size_t i = 0; i < tmp.size(); i++) {
Split(tmp[i], tmp2, ":");
if (2 != tmp2.size()) {
XLOG(ERROR) << "emitProb illegal.";
bool GetLine(ifstream& ifile, string& line) {
while(getline(ifile, line)) {
Trim(line);
if(line.empty()) {
continue;
}
if(StartsWith(line, "#")) {
continue;
}
return true;
}
return false;
}
if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
XLOG(ERROR) << "TransCode failed.";
return false;
}
mp[unicode[0]] = atof(tmp2[1].c_str());
}
return true;
}
bool LoadEmitProb(const string& line, EmitProbMap& mp) {
if(line.empty()) {
return false;
}
vector<string> tmp, tmp2;
Unicode unicode;
Split(line, tmp, ",");
for(size_t i = 0; i < tmp.size(); i++) {
Split(tmp[i], tmp2, ":");
if(2 != tmp2.size()) {
XLOG(ERROR) << "emitProb illegal.";
return false;
}
if(!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
XLOG(ERROR) << "TransCode failed.";
return false;
}
mp[unicode[0]] = atof(tmp2[1].c_str());
}
return true;
}
char statMap[STATUS_SUM];
double startProb[STATUS_SUM];
double transProb[STATUS_SUM][STATUS_SUM];
EmitProbMap emitProbB;
EmitProbMap emitProbE;
EmitProbMap emitProbM;
EmitProbMap emitProbS;
vector<EmitProbMap* > emitProbVec;
char statMap[STATUS_SUM];
double startProb[STATUS_SUM];
double transProb[STATUS_SUM][STATUS_SUM];
EmitProbMap emitProbB;
EmitProbMap emitProbE;
EmitProbMap emitProbM;
EmitProbMap emitProbS;
vector<EmitProbMap* > emitProbVec;
}; // struct HMMModel
} // namespace cppjieba

View File

@ -28,179 +28,179 @@
namespace cppjieba {
class HMMSegment: public SegmentBase {
public:
HMMSegment(const string& filePath)
: model_(new HMMModel(filePath)), isNeedDestroy_(true) {
}
HMMSegment(const HMMModel* model)
: model_(model), isNeedDestroy_(false) {
}
~HMMSegment() {
if (isNeedDestroy_) {
delete model_;
public:
HMMSegment(const string& filePath)
: model_(new HMMModel(filePath)), isNeedDestroy_(true) {
}
}
void Cut(const string& sentence,
vector<string>& words) const {
vector<Word> tmp;
Cut(sentence, tmp);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence,
vector<Word>& words) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size()/2);
while (pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs);
HMMSegment(const HMMModel* model)
: model_(model), isNeedDestroy_(false) {
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
RuneStrArray::const_iterator left = begin;
RuneStrArray::const_iterator right = begin;
while (right != end) {
if (right->rune < 0x80) {
if (left != right) {
InternalCut(left, right, res);
~HMMSegment() {
if(isNeedDestroy_) {
delete model_;
}
left = right;
do {
right = SequentialLetterRule(left, end);
if (right != left) {
break;
}
right = NumbersRule(left, end);
if (right != left) {
break;
}
right ++;
} while (false);
WordRange wr(left, right - 1);
res.push_back(wr);
left = right;
} else {
right++;
}
}
if (left != right) {
InternalCut(left, right, res);
}
}
private:
// sequential letters rule
RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
Rune x = begin->rune;
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
begin ++;
} else {
return begin;
}
while (begin != end) {
x = begin->rune;
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
begin ++;
} else {
break;
}
}
return begin;
}
//
RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
Rune x = begin->rune;
if ('0' <= x && x <= '9') {
begin ++;
} else {
return begin;
}
while (begin != end) {
x = begin->rune;
if ( ('0' <= x && x <= '9') || x == '.') {
begin++;
} else {
break;
}
}
return begin;
}
void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
vector<size_t> status;
Viterbi(begin, end, status);
RuneStrArray::const_iterator left = begin;
RuneStrArray::const_iterator right;
for (size_t i = 0; i < status.size(); i++) {
if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
right = begin + i + 1;
WordRange wr(left, right - 1);
res.push_back(wr);
left = right;
}
}
}
void Viterbi(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<size_t>& status) const {
size_t Y = HMMModel::STATUS_SUM;
size_t X = end - begin;
size_t XYSize = X * Y;
size_t now, old, stat;
double tmp, endE, endS;
vector<int> path(XYSize);
vector<double> weight(XYSize);
//start
for (size_t y = 0; y < Y; y++) {
weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
path[0 + y * X] = -1;
}
double emitProb;
for (size_t x = 1; x < X; x++) {
for (size_t y = 0; y < Y; y++) {
now = x + y*X;
weight[now] = MIN_DOUBLE;
path[now] = HMMModel::E; // warning
emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin+x)->rune, MIN_DOUBLE);
for (size_t preY = 0; preY < Y; preY++) {
old = x - 1 + preY * X;
tmp = weight[old] + model_->transProb[preY][y] + emitProb;
if (tmp > weight[now]) {
weight[now] = tmp;
path[now] = preY;
}
void Cut(const string& sentence,
vector<string>& words) const {
vector<Word> tmp;
Cut(sentence, tmp);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence,
vector<Word>& words) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while(pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
RuneStrArray::const_iterator left = begin;
RuneStrArray::const_iterator right = begin;
while(right != end) {
if(right->rune < 0x80) {
if(left != right) {
InternalCut(left, right, res);
}
left = right;
do {
right = SequentialLetterRule(left, end);
if(right != left) {
break;
}
right = NumbersRule(left, end);
if(right != left) {
break;
}
right ++;
} while(false);
WordRange wr(left, right - 1);
res.push_back(wr);
left = right;
} else {
right++;
}
}
if(left != right) {
InternalCut(left, right, res);
}
}
private:
// sequential letters rule
RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
Rune x = begin->rune;
if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
begin ++;
} else {
return begin;
}
while(begin != end) {
x = begin->rune;
if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
begin ++;
} else {
break;
}
}
return begin;
}
//
RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
Rune x = begin->rune;
if('0' <= x && x <= '9') {
begin ++;
} else {
return begin;
}
while(begin != end) {
x = begin->rune;
if(('0' <= x && x <= '9') || x == '.') {
begin++;
} else {
break;
}
}
return begin;
}
void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
vector<size_t> status;
Viterbi(begin, end, status);
RuneStrArray::const_iterator left = begin;
RuneStrArray::const_iterator right;
for(size_t i = 0; i < status.size(); i++) {
if(status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
right = begin + i + 1;
WordRange wr(left, right - 1);
res.push_back(wr);
left = right;
}
}
}
}
endE = weight[X-1+HMMModel::E*X];
endS = weight[X-1+HMMModel::S*X];
stat = 0;
if (endE >= endS) {
stat = HMMModel::E;
} else {
stat = HMMModel::S;
void Viterbi(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<size_t>& status) const {
size_t Y = HMMModel::STATUS_SUM;
size_t X = end - begin;
size_t XYSize = X * Y;
size_t now, old, stat;
double tmp, endE, endS;
vector<int> path(XYSize);
vector<double> weight(XYSize);
//start
for(size_t y = 0; y < Y; y++) {
weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], begin->rune, MIN_DOUBLE);
path[0 + y * X] = -1;
}
double emitProb;
for(size_t x = 1; x < X; x++) {
for(size_t y = 0; y < Y; y++) {
now = x + y * X;
weight[now] = MIN_DOUBLE;
path[now] = HMMModel::E; // warning
emitProb = model_->GetEmitProb(model_->emitProbVec[y], (begin + x)->rune, MIN_DOUBLE);
for(size_t preY = 0; preY < Y; preY++) {
old = x - 1 + preY * X;
tmp = weight[old] + model_->transProb[preY][y] + emitProb;
if(tmp > weight[now]) {
weight[now] = tmp;
path[now] = preY;
}
}
}
}
endE = weight[X - 1 + HMMModel::E * X];
endS = weight[X - 1 + HMMModel::S * X];
stat = 0;
if(endE >= endS) {
stat = HMMModel::E;
} else {
stat = HMMModel::S;
}
status.resize(X);
for(int x = X - 1 ; x >= 0; x--) {
status[x] = stat;
stat = path[x + stat * X];
}
}
status.resize(X);
for (int x = X -1 ; x >= 0; x--) {
status[x] = stat;
stat = path[x + stat*X];
}
}
const HMMModel* model_;
bool isNeedDestroy_;
const HMMModel* model_;
bool isNeedDestroy_;
}; // class HMMSegment
} // namespace cppjieba

View File

@ -25,123 +25,122 @@
namespace cppjieba {
class Jieba {
public:
Jieba(const string& dict_path,
const string& model_path,
const string& user_dict_path,
const string& idfPath,
const string& stopWordPath)
: dict_trie_(dict_path, user_dict_path),
model_(model_path),
mp_seg_(&dict_trie_),
hmm_seg_(&model_),
mix_seg_(&dict_trie_, &model_),
full_seg_(&dict_trie_),
query_seg_(&dict_trie_, &model_),
extractor(&dict_trie_, &model_, idfPath, stopWordPath) {
public:
Jieba(const string& dict_path,
const string& model_path,
const string& user_dict_path,
const string& idfPath,
const string& stopWordPath)
: dict_trie_(dict_path, user_dict_path),
model_(model_path),
mp_seg_(&dict_trie_),
hmm_seg_(&model_),
mix_seg_(&dict_trie_, &model_),
full_seg_(&dict_trie_),
query_seg_(&dict_trie_, &model_),
extractor(&dict_trie_, &model_, idfPath, stopWordPath) {
}
~Jieba() {
}
}
~Jieba() {
}
struct LocWord {
string word;
size_t begin;
size_t end;
}; // struct LocWord
struct LocWord {
string word;
size_t begin;
size_t end;
}; // struct LocWord
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
mix_seg_.Cut(sentence, words, hmm);
}
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
mix_seg_.Cut(sentence, words, hmm);
}
void CutAll(const string& sentence, vector<string>& words) const {
full_seg_.Cut(sentence, words);
}
void CutAll(const string& sentence, vector<Word>& words) const {
full_seg_.Cut(sentence, words);
}
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
query_seg_.Cut(sentence, words, hmm);
}
void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
query_seg_.Cut(sentence, words, hmm);
}
void CutHMM(const string& sentence, vector<string>& words) const {
hmm_seg_.Cut(sentence, words);
}
void CutHMM(const string& sentence, vector<Word>& words) const {
hmm_seg_.Cut(sentence, words);
}
void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
mp_seg_.Cut(sentence, words, max_word_len);
}
void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
mp_seg_.Cut(sentence, words, max_word_len);
}
void Tag(const string& sentence, vector<pair<string, string> >& words) const {
mix_seg_.Tag(sentence, words);
}
string LookupTag(const string &str) const {
return mix_seg_.LookupTag(str);
}
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
return dict_trie_.InsertUserWord(word, tag);
}
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
mix_seg_.Cut(sentence, words, hmm);
}
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
mix_seg_.Cut(sentence, words, hmm);
}
void CutAll(const string& sentence, vector<string>& words) const {
full_seg_.Cut(sentence, words);
}
void CutAll(const string& sentence, vector<Word>& words) const {
full_seg_.Cut(sentence, words);
}
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
query_seg_.Cut(sentence, words, hmm);
}
void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
query_seg_.Cut(sentence, words, hmm);
}
void CutHMM(const string& sentence, vector<string>& words) const {
hmm_seg_.Cut(sentence, words);
}
void CutHMM(const string& sentence, vector<Word>& words) const {
hmm_seg_.Cut(sentence, words);
}
void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
mp_seg_.Cut(sentence, words, max_word_len);
}
void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
mp_seg_.Cut(sentence, words, max_word_len);
}
bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) {
return dict_trie_.InsertUserWord(word,freq, tag);
}
void Tag(const string& sentence, vector<pair<string, string> >& words) const {
mix_seg_.Tag(sentence, words);
}
string LookupTag(const string &str) const {
return mix_seg_.LookupTag(str);
}
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
return dict_trie_.InsertUserWord(word, tag);
}
bool Find(const string& word)
{
return dict_trie_.Find(word);
}
bool InsertUserWord(const string& word, int freq, const string& tag = UNKNOWN_TAG) {
return dict_trie_.InsertUserWord(word, freq, tag);
}
void ResetSeparators(const string& s) {
//TODO
mp_seg_.ResetSeparators(s);
hmm_seg_.ResetSeparators(s);
mix_seg_.ResetSeparators(s);
full_seg_.ResetSeparators(s);
query_seg_.ResetSeparators(s);
}
bool Find(const string& word) {
return dict_trie_.Find(word);
}
const DictTrie* GetDictTrie() const {
return &dict_trie_;
}
const HMMModel* GetHMMModel() const {
return &model_;
}
void ResetSeparators(const string& s) {
//TODO
mp_seg_.ResetSeparators(s);
hmm_seg_.ResetSeparators(s);
mix_seg_.ResetSeparators(s);
full_seg_.ResetSeparators(s);
query_seg_.ResetSeparators(s);
}
void LoadUserDict(const vector<string>& buf) {
dict_trie_.LoadUserDict(buf);
}
const DictTrie* GetDictTrie() const {
return &dict_trie_;
}
void LoadUserDict(const set<string>& buf) {
dict_trie_.LoadUserDict(buf);
}
const HMMModel* GetHMMModel() const {
return &model_;
}
void LoadUserDict(const string& path) {
dict_trie_.LoadUserDict(path);
}
void LoadUserDict(const vector<string>& buf) {
dict_trie_.LoadUserDict(buf);
}
private:
DictTrie dict_trie_;
HMMModel model_;
// They share the same dict trie and model
MPSegment mp_seg_;
HMMSegment hmm_seg_;
MixSegment mix_seg_;
FullSegment full_seg_;
QuerySegment query_seg_;
void LoadUserDict(const set<string>& buf) {
dict_trie_.LoadUserDict(buf);
}
public:
KeywordExtractor extractor;
void LoadUserDict(const string& path) {
dict_trie_.LoadUserDict(path);
}
private:
DictTrie dict_trie_;
HMMModel model_;
// They share the same dict trie and model
MPSegment mp_seg_;
HMMSegment hmm_seg_;
MixSegment mix_seg_;
FullSegment full_seg_;
QuerySegment query_seg_;
public:
KeywordExtractor extractor;
}; // class Jieba
} // namespace cppjieba

View File

@ -30,138 +30,138 @@ using namespace std;
/*utf8*/
class KeywordExtractor {
public:
struct Word {
string word;
vector<size_t> offsets;
double weight;
}; // struct Word
public:
struct Word {
string word;
vector<size_t> offsets;
double weight;
}; // struct Word
KeywordExtractor(const string& dictPath,
const string& hmmFilePath,
const string& idfPath,
const string& stopWordPath,
const string& userDict = "")
: segment_(dictPath, hmmFilePath, userDict) {
LoadIdfDict(idfPath);
LoadStopWordDict(stopWordPath);
}
KeywordExtractor(const DictTrie* dictTrie,
const HMMModel* model,
const string& idfPath,
const string& stopWordPath)
: segment_(dictTrie, model) {
LoadIdfDict(idfPath);
LoadStopWordDict(stopWordPath);
}
~KeywordExtractor() {
}
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
vector<Word> topWords;
Extract(sentence, topWords, topN);
for (size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(topWords[i].word);
KeywordExtractor(const string& dictPath,
const string& hmmFilePath,
const string& idfPath,
const string& stopWordPath,
const string& userDict = "")
: segment_(dictPath, hmmFilePath, userDict) {
LoadIdfDict(idfPath);
LoadStopWordDict(stopWordPath);
}
}
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
vector<Word> topWords;
Extract(sentence, topWords, topN);
for (size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
KeywordExtractor(const DictTrie* dictTrie,
const HMMModel* model,
const string& idfPath,
const string& stopWordPath)
: segment_(dictTrie, model) {
LoadIdfDict(idfPath);
LoadStopWordDict(stopWordPath);
}
}
void Extract(const string& sentence, vector<Word>& keywords, size_t topN) const {
vector<string> words;
segment_.Cut(sentence, words);
map<string, Word> wordmap;
size_t offset = 0;
for (size_t i = 0; i < words.size(); ++i) {
size_t t = offset;
offset += words[i].size();
if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
continue;
}
wordmap[words[i]].offsets.push_back(t);
wordmap[words[i]].weight += 1.0;
}
if (offset != sentence.size()) {
XLOG(ERROR) << "words illegal";
return;
~KeywordExtractor() {
}
keywords.clear();
keywords.reserve(wordmap.size());
for (map<string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);
if (cit != idfMap_.end()) {
itr->second.weight *= cit->second;
} else {
itr->second.weight *= idfAverage_;
}
itr->second.word = itr->first;
keywords.push_back(itr->second);
}
topN = min(topN, keywords.size());
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
keywords.resize(topN);
}
private:
void LoadIdfDict(const string& idfPath) {
ifstream ifs(idfPath.c_str());
XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
string line ;
vector<string> buf;
double idf = 0.0;
double idfSum = 0.0;
size_t lineno = 0;
for (; getline(ifs, line); lineno++) {
buf.clear();
if (line.empty()) {
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
continue;
}
Split(line, buf, " ");
if (buf.size() != 2) {
XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
continue;
}
idf = atof(buf[1].c_str());
idfMap_[buf[0]] = idf;
idfSum += idf;
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
vector<Word> topWords;
Extract(sentence, topWords, topN);
for(size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(topWords[i].word);
}
}
assert(lineno);
idfAverage_ = idfSum / lineno;
assert(idfAverage_ > 0.0);
}
void LoadStopWordDict(const string& filePath) {
ifstream ifs(filePath.c_str());
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
string line ;
while (getline(ifs, line)) {
stopWords_.insert(line);
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
vector<Word> topWords;
Extract(sentence, topWords, topN);
for(size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
}
}
assert(stopWords_.size());
}
static bool Compare(const Word& lhs, const Word& rhs) {
return lhs.weight > rhs.weight;
}
void Extract(const string& sentence, vector<Word>& keywords, size_t topN) const {
vector<string> words;
segment_.Cut(sentence, words);
MixSegment segment_;
unordered_map<string, double> idfMap_;
double idfAverage_;
map<string, Word> wordmap;
size_t offset = 0;
for(size_t i = 0; i < words.size(); ++i) {
size_t t = offset;
offset += words[i].size();
if(IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
continue;
}
wordmap[words[i]].offsets.push_back(t);
wordmap[words[i]].weight += 1.0;
}
if(offset != sentence.size()) {
XLOG(ERROR) << "words illegal";
return;
}
unordered_set<string> stopWords_;
keywords.clear();
keywords.reserve(wordmap.size());
for(map<string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);
if(cit != idfMap_.end()) {
itr->second.weight *= cit->second;
} else {
itr->second.weight *= idfAverage_;
}
itr->second.word = itr->first;
keywords.push_back(itr->second);
}
topN = min(topN, keywords.size());
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
keywords.resize(topN);
}
private:
void LoadIdfDict(const string& idfPath) {
ifstream ifs(idfPath.c_str());
XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
string line ;
vector<string> buf;
double idf = 0.0;
double idfSum = 0.0;
size_t lineno = 0;
for(; getline(ifs, line); lineno++) {
buf.clear();
if(line.empty()) {
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
continue;
}
Split(line, buf, " ");
if(buf.size() != 2) {
XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
continue;
}
idf = atof(buf[1].c_str());
idfMap_[buf[0]] = idf;
idfSum += idf;
}
assert(lineno);
idfAverage_ = idfSum / lineno;
assert(idfAverage_ > 0.0);
}
void LoadStopWordDict(const string& filePath) {
ifstream ifs(filePath.c_str());
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
string line ;
while(getline(ifs, line)) {
stopWords_.insert(line);
}
assert(stopWords_.size());
}
static bool Compare(const Word& lhs, const Word& rhs) {
return lhs.weight > rhs.weight;
}
MixSegment segment_;
unordered_map<string, double> idfMap_;
double idfAverage_;
unordered_set<string> stopWords_;
}; // class KeywordExtractor
inline ostream& operator << (ostream& os, const KeywordExtractor::Word& word) {
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}";
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}";
}
} // namespace cppjieba

View File

@ -30,123 +30,123 @@
namespace cppjieba {
class MPSegment: public SegmentTagged {
public:
MPSegment(const string& dictPath, const string& userDictPath = "")
: dictTrie_(new DictTrie(dictPath, userDictPath)), isNeedDestroy_(true) {
}
MPSegment(const DictTrie* dictTrie)
: dictTrie_(dictTrie), isNeedDestroy_(false) {
assert(dictTrie_);
}
~MPSegment() {
if (isNeedDestroy_) {
delete dictTrie_;
public:
MPSegment(const string& dictPath, const string& userDictPath = "")
: dictTrie_(new DictTrie(dictPath, userDictPath)), isNeedDestroy_(true) {
}
}
void Cut(const string& sentence, vector<string>& words) const {
Cut(sentence, words, MAX_WORD_LENGTH);
}
void Cut(const string& sentence,
vector<string>& words,
size_t max_word_len) const {
vector<Word> tmp;
Cut(sentence, tmp, max_word_len);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence,
vector<Word>& words,
size_t max_word_len = MAX_WORD_LENGTH) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size()/2);
while (pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs, max_word_len);
MPSegment(const DictTrie* dictTrie)
: dictTrie_(dictTrie), isNeedDestroy_(false) {
assert(dictTrie_);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<WordRange>& words,
size_t max_word_len = MAX_WORD_LENGTH) const {
vector<Dag> dags;
dictTrie_->Find(begin,
end,
dags,
max_word_len);
CalcDP(dags);
CutByDag(begin, end, dags, words);
}
const DictTrie* GetDictTrie() const {
return dictTrie_;
}
bool Tag(const string& src, vector<pair<string, string> >& res) const {
return tagger_.Tag(src, res, *this);
}
bool IsUserDictSingleChineseWord(const Rune& value) const {
return dictTrie_->IsUserDictSingleChineseWord(value);
}
private:
void CalcDP(vector<Dag>& dags) const {
size_t nextPos;
const DictUnit* p;
double val;
for (vector<Dag>::reverse_iterator rit = dags.rbegin(); rit != dags.rend(); rit++) {
rit->pInfo = NULL;
rit->weight = MIN_DOUBLE;
assert(!rit->nexts.empty());
for (LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) {
nextPos = it->first;
p = it->second;
val = 0.0;
if (nextPos + 1 < dags.size()) {
val += dags[nextPos + 1].weight;
~MPSegment() {
if(isNeedDestroy_) {
delete dictTrie_;
}
if (p) {
val += p->weight;
} else {
val += dictTrie_->GetMinWeight();
}
if (val > rit->weight) {
rit->pInfo = p;
rit->weight = val;
}
}
}
}
void CutByDag(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
const vector<Dag>& dags,
vector<WordRange>& words) const {
size_t i = 0;
while (i < dags.size()) {
const DictUnit* p = dags[i].pInfo;
if (p) {
assert(p->word.size() >= 1);
WordRange wr(begin + i, begin + i + p->word.size() - 1);
words.push_back(wr);
i += p->word.size();
} else { //single chinese word
WordRange wr(begin + i, begin + i);
words.push_back(wr);
i++;
}
}
}
const DictTrie* dictTrie_;
bool isNeedDestroy_;
PosTagger tagger_;
void Cut(const string& sentence, vector<string>& words) const {
Cut(sentence, words, MAX_WORD_LENGTH);
}
void Cut(const string& sentence,
vector<string>& words,
size_t max_word_len) const {
vector<Word> tmp;
Cut(sentence, tmp, max_word_len);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence,
vector<Word>& words,
size_t max_word_len = MAX_WORD_LENGTH) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while(pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs, max_word_len);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<WordRange>& words,
size_t max_word_len = MAX_WORD_LENGTH) const {
vector<Dag> dags;
dictTrie_->Find(begin,
end,
dags,
max_word_len);
CalcDP(dags);
CutByDag(begin, end, dags, words);
}
const DictTrie* GetDictTrie() const {
return dictTrie_;
}
bool Tag(const string& src, vector<pair<string, string> >& res) const {
return tagger_.Tag(src, res, *this);
}
bool IsUserDictSingleChineseWord(const Rune& value) const {
return dictTrie_->IsUserDictSingleChineseWord(value);
}
private:
void CalcDP(vector<Dag>& dags) const {
size_t nextPos;
const DictUnit* p;
double val;
for(vector<Dag>::reverse_iterator rit = dags.rbegin(); rit != dags.rend(); rit++) {
rit->pInfo = NULL;
rit->weight = MIN_DOUBLE;
assert(!rit->nexts.empty());
for(LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) {
nextPos = it->first;
p = it->second;
val = 0.0;
if(nextPos + 1 < dags.size()) {
val += dags[nextPos + 1].weight;
}
if(p) {
val += p->weight;
} else {
val += dictTrie_->GetMinWeight();
}
if(val > rit->weight) {
rit->pInfo = p;
rit->weight = val;
}
}
}
}
void CutByDag(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
const vector<Dag>& dags,
vector<WordRange>& words) const {
size_t i = 0;
while(i < dags.size()) {
const DictUnit* p = dags[i].pInfo;
if(p) {
assert(p->word.size() >= 1);
WordRange wr(begin + i, begin + i + p->word.size() - 1);
words.push_back(wr);
i += p->word.size();
} else { //single chinese word
WordRange wr(begin + i, begin + i);
words.push_back(wr);
i++;
}
}
}
const DictTrie* dictTrie_;
bool isNeedDestroy_;
PosTagger tagger_;
}; // class MPSegment

View File

@ -27,98 +27,98 @@
namespace cppjieba {
class MixSegment: public SegmentTagged {
public:
MixSegment(const string& mpSegDict, const string& hmmSegDict,
const string& userDict = "")
: mpSeg_(mpSegDict, userDict),
hmmSeg_(hmmSegDict) {
}
MixSegment(const DictTrie* dictTrie, const HMMModel* model)
: mpSeg_(dictTrie), hmmSeg_(model) {
}
~MixSegment() {
}
void Cut(const string& sentence, vector<string>& words) const {
Cut(sentence, words, true);
}
void Cut(const string& sentence, vector<string>& words, bool hmm) const {
vector<Word> tmp;
Cut(sentence, tmp, hmm);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while (pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs, hmm);
public:
MixSegment(const string& mpSegDict, const string& hmmSegDict,
const string& userDict = "")
: mpSeg_(mpSegDict, userDict),
hmmSeg_(hmmSegDict) {
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
if (!hmm) {
mpSeg_.Cut(begin, end, res);
return;
MixSegment(const DictTrie* dictTrie, const HMMModel* model)
: mpSeg_(dictTrie), hmmSeg_(model) {
}
vector<WordRange> words;
assert(end >= begin);
words.reserve(end - begin);
mpSeg_.Cut(begin, end, words);
vector<WordRange> hmmRes;
hmmRes.reserve(end - begin);
for (size_t i = 0; i < words.size(); i++) {
//if mp Get a word, it's ok, put it into result
if (words[i].left != words[i].right || (words[i].left == words[i].right && mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
res.push_back(words[i]);
continue;
}
// if mp Get a single one and it is not in userdict, collect it in sequence
size_t j = i;
while (j < words.size() && words[j].left == words[j].right && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
j++;
}
// Cut the sequence with hmm
assert(j - 1 >= i);
// TODO
hmmSeg_.Cut(words[i].left, words[j - 1].left + 1, hmmRes);
//put hmm result to result
for (size_t k = 0; k < hmmRes.size(); k++) {
res.push_back(hmmRes[k]);
}
//clear tmp vars
hmmRes.clear();
//let i jump over this piece
i = j - 1;
~MixSegment() {
}
}
const DictTrie* GetDictTrie() const {
return mpSeg_.GetDictTrie();
}
void Cut(const string& sentence, vector<string>& words) const {
Cut(sentence, words, true);
}
void Cut(const string& sentence, vector<string>& words, bool hmm) const {
vector<Word> tmp;
Cut(sentence, tmp, hmm);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while(pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs, hmm);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
bool Tag(const string& src, vector<pair<string, string> >& res) const {
return tagger_.Tag(src, res, *this);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
if(!hmm) {
mpSeg_.Cut(begin, end, res);
return;
}
vector<WordRange> words;
assert(end >= begin);
words.reserve(end - begin);
mpSeg_.Cut(begin, end, words);
string LookupTag(const string &str) const {
return tagger_.LookupTag(str, *this);
}
vector<WordRange> hmmRes;
hmmRes.reserve(end - begin);
for(size_t i = 0; i < words.size(); i++) {
//if mp Get a word, it's ok, put it into result
if(words[i].left != words[i].right || (words[i].left == words[i].right && mpSeg_.IsUserDictSingleChineseWord(words[i].left->rune))) {
res.push_back(words[i]);
continue;
}
private:
MPSegment mpSeg_;
HMMSegment hmmSeg_;
PosTagger tagger_;
// if mp Get a single one and it is not in userdict, collect it in sequence
size_t j = i;
while(j < words.size() && words[j].left == words[j].right && !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
j++;
}
// Cut the sequence with hmm
assert(j - 1 >= i);
// TODO
hmmSeg_.Cut(words[i].left, words[j - 1].left + 1, hmmRes);
//put hmm result to result
for(size_t k = 0; k < hmmRes.size(); k++) {
res.push_back(hmmRes[k]);
}
//clear tmp vars
hmmRes.clear();
//let i jump over this piece
i = j - 1;
}
}
const DictTrie* GetDictTrie() const {
return mpSeg_.GetDictTrie();
}
bool Tag(const string& src, vector<pair<string, string> >& res) const {
return tagger_.Tag(src, res, *this);
}
string LookupTag(const string &str) const {
return tagger_.LookupTag(str, *this);
}
private:
MPSegment mpSeg_;
HMMSegment hmmSeg_;
PosTagger tagger_;
}; // class MixSegment

View File

@ -31,62 +31,62 @@ static const char* const POS_ENG = "eng";
static const char* const POS_X = "x";
class PosTagger {
public:
PosTagger() {
}
~PosTagger() {
}
bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
vector<string> CutRes;
segment.Cut(src, CutRes);
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
public:
PosTagger() {
}
~PosTagger() {
}
return !res.empty();
}
string LookupTag(const string &str, const SegmentTagged& segment) const {
const DictUnit *tmp = NULL;
RuneStrArray runes;
const DictTrie * dict = segment.GetDictTrie();
assert(dict != NULL);
if (!DecodeRunesInString(str, runes)) {
XLOG(ERROR) << "Decode failed.";
return POS_X;
}
tmp = dict->Find(runes.begin(), runes.end());
if (tmp == NULL || tmp->tag.empty()) {
return SpecialRule(runes);
} else {
return tmp->tag;
}
}
bool Tag(const string& src, vector<pair<string, string> >& res, const SegmentTagged& segment) const {
vector<string> CutRes;
segment.Cut(src, CutRes);
private:
const char* SpecialRule(const RuneStrArray& unicode) const {
size_t m = 0;
size_t eng = 0;
for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
if (unicode[i].rune < 0x80) {
eng ++;
if ('0' <= unicode[i].rune && unicode[i].rune <= '9') {
m++;
for(vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
}
}
return !res.empty();
}
// ascii char is not found
if (eng == 0) {
return POS_X;
string LookupTag(const string &str, const SegmentTagged& segment) const {
const DictUnit *tmp = NULL;
RuneStrArray runes;
const DictTrie * dict = segment.GetDictTrie();
assert(dict != NULL);
if(!DecodeRunesInString(str, runes)) {
XLOG(ERROR) << "Decode failed.";
return POS_X;
}
tmp = dict->Find(runes.begin(), runes.end());
if(tmp == NULL || tmp->tag.empty()) {
return SpecialRule(runes);
} else {
return tmp->tag;
}
}
// all the ascii is number char
if (m == eng) {
return POS_M;
private:
const char* SpecialRule(const RuneStrArray& unicode) const {
size_t m = 0;
size_t eng = 0;
for(size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
if(unicode[i].rune < 0x80) {
eng ++;
if('0' <= unicode[i].rune && unicode[i].rune <= '9') {
m++;
}
}
}
// ascii char is not found
if(eng == 0) {
return POS_X;
}
// all the ascii is number char
if(m == eng) {
return POS_M;
}
// the ascii chars contain english letter
return POS_ENG;
}
// the ascii chars contain english letter
return POS_ENG;
}
}; // class PosTagger

View File

@ -25,46 +25,46 @@
namespace cppjieba {
class PreFilter {
public:
//TODO use WordRange instead of Range
struct Range {
RuneStrArray::const_iterator begin;
RuneStrArray::const_iterator end;
}; // struct Range
public:
//TODO use WordRange instead of Range
struct Range {
RuneStrArray::const_iterator begin;
RuneStrArray::const_iterator end;
}; // struct Range
PreFilter(const unordered_set<Rune>& symbols,
const string& sentence)
: symbols_(symbols) {
if (!DecodeRunesInString(sentence, sentence_)) {
XLOG(ERROR) << "decode failed. ";
}
cursor_ = sentence_.begin();
}
~PreFilter() {
}
bool HasNext() const {
return cursor_ != sentence_.end();
}
Range Next() {
Range range;
range.begin = cursor_;
while (cursor_ != sentence_.end()) {
if (IsIn(symbols_, cursor_->rune)) {
if (range.begin == cursor_) {
cursor_ ++;
PreFilter(const unordered_set<Rune>& symbols,
const string& sentence)
: symbols_(symbols) {
if(!DecodeRunesInString(sentence, sentence_)) {
XLOG(ERROR) << "decode failed. ";
}
range.end = cursor_;
return range;
}
cursor_ ++;
cursor_ = sentence_.begin();
}
range.end = sentence_.end();
return range;
}
private:
RuneStrArray::const_iterator cursor_;
RuneStrArray sentence_;
const unordered_set<Rune>& symbols_;
~PreFilter() {
}
bool HasNext() const {
return cursor_ != sentence_.end();
}
Range Next() {
Range range;
range.begin = cursor_;
while(cursor_ != sentence_.end()) {
if(IsIn(symbols_, cursor_->rune)) {
if(range.begin == cursor_) {
cursor_ ++;
}
range.end = cursor_;
return range;
}
cursor_ ++;
}
range.end = sentence_.end();
return range;
}
private:
RuneStrArray::const_iterator cursor_;
RuneStrArray sentence_;
const unordered_set<Rune>& symbols_;
}; // class PreFilter
} // namespace cppjieba

View File

@ -31,75 +31,75 @@
namespace cppjieba {
class QuerySegment: public SegmentBase {
public:
QuerySegment(const string& dict, const string& model, const string& userDict = "")
: mixSeg_(dict, model, userDict),
trie_(mixSeg_.GetDictTrie()) {
}
QuerySegment(const DictTrie* dictTrie, const HMMModel* model)
: mixSeg_(dictTrie, model), trie_(dictTrie) {
}
~QuerySegment() {
}
void Cut(const string& sentence, vector<string>& words) const {
Cut(sentence, words, true);
}
void Cut(const string& sentence, vector<string>& words, bool hmm) const {
vector<Word> tmp;
Cut(sentence, tmp, hmm);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size()/2);
while (pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs, hmm);
public:
QuerySegment(const string& dict, const string& model, const string& userDict = "")
: mixSeg_(dict, model, userDict),
trie_(mixSeg_.GetDictTrie()) {
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
//use mix Cut first
vector<WordRange> mixRes;
mixSeg_.Cut(begin, end, mixRes, hmm);
vector<WordRange> fullRes;
for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
if (mixResItr->Length() > 2) {
for (size_t i = 0; i + 1 < mixResItr->Length(); i++) {
WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
if (trie_->Find(wr.left, wr.right + 1) != NULL) {
res.push_back(wr);
}
}
}
if (mixResItr->Length() > 3) {
for (size_t i = 0; i + 2 < mixResItr->Length(); i++) {
WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
if (trie_->Find(wr.left, wr.right + 1) != NULL) {
res.push_back(wr);
}
}
}
res.push_back(*mixResItr);
QuerySegment(const DictTrie* dictTrie, const HMMModel* model)
: mixSeg_(dictTrie, model), trie_(dictTrie) {
}
}
private:
bool IsAllAscii(const Unicode& s) const {
for(size_t i = 0; i < s.size(); i++) {
if (s[i] >= 0x80) {
return false;
}
}
return true;
}
MixSegment mixSeg_;
const DictTrie* trie_;
~QuerySegment() {
}
void Cut(const string& sentence, vector<string>& words) const {
Cut(sentence, words, true);
}
void Cut(const string& sentence, vector<string>& words, bool hmm) const {
vector<Word> tmp;
Cut(sentence, tmp, hmm);
GetStringsFromWords(tmp, words);
}
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2);
while(pre_filter.HasNext()) {
range = pre_filter.Next();
Cut(range.begin, range.end, wrs, hmm);
}
words.clear();
words.reserve(wrs.size());
GetWordsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
//use mix Cut first
vector<WordRange> mixRes;
mixSeg_.Cut(begin, end, mixRes, hmm);
vector<WordRange> fullRes;
for(vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
if(mixResItr->Length() > 2) {
for(size_t i = 0; i + 1 < mixResItr->Length(); i++) {
WordRange wr(mixResItr->left + i, mixResItr->left + i + 1);
if(trie_->Find(wr.left, wr.right + 1) != NULL) {
res.push_back(wr);
}
}
}
if(mixResItr->Length() > 3) {
for(size_t i = 0; i + 2 < mixResItr->Length(); i++) {
WordRange wr(mixResItr->left + i, mixResItr->left + i + 2);
if(trie_->Find(wr.left, wr.right + 1) != NULL) {
res.push_back(wr);
}
}
}
res.push_back(*mixResItr);
}
}
private:
bool IsAllAscii(const Unicode& s) const {
for(size_t i = 0; i < s.size(); i++) {
if(s[i] >= 0x80) {
return false;
}
}
return true;
}
MixSegment mixSeg_;
const DictTrie* trie_;
}; // QuerySegment
} // namespace cppjieba

View File

@ -31,32 +31,32 @@ const char* const SPECIAL_SEPARATORS = " \t\n\xEF\xBC\x8C\xE3\x80\x82";
using namespace limonp;
class SegmentBase {
public:
SegmentBase() {
XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
}
virtual ~SegmentBase() {
}
virtual void Cut(const string& sentence, vector<string>& words) const = 0;
bool ResetSeparators(const string& s) {
symbols_.clear();
RuneStrArray runes;
if (!DecodeRunesInString(s, runes)) {
XLOG(ERROR) << "decode " << s << " failed";
return false;
public:
SegmentBase() {
XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
}
for (size_t i = 0; i < runes.size(); i++) {
if (!symbols_.insert(runes[i].rune).second) {
XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
return false;
}
virtual ~SegmentBase() {
}
return true;
}
protected:
unordered_set<Rune> symbols_;
virtual void Cut(const string& sentence, vector<string>& words) const = 0;
bool ResetSeparators(const string& s) {
symbols_.clear();
RuneStrArray runes;
if(!DecodeRunesInString(s, runes)) {
XLOG(ERROR) << "decode " << s << " failed";
return false;
}
for(size_t i = 0; i < runes.size(); i++) {
if(!symbols_.insert(runes[i].rune).second) {
XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
return false;
}
}
return true;
}
protected:
unordered_set<Rune> symbols_;
}; // class SegmentBase
} // cppjieba

View File

@ -23,16 +23,16 @@
namespace cppjieba {
class SegmentTagged : public SegmentBase{
public:
SegmentTagged() {
}
virtual ~SegmentTagged() {
}
class SegmentTagged : public SegmentBase {
public:
SegmentTagged() {
}
virtual ~SegmentTagged() {
}
virtual bool Tag(const string& src, vector<pair<string, string> >& res) const = 0;
virtual bool Tag(const string& src, vector<pair<string, string> >& res) const = 0;
virtual const DictTrie* GetDictTrie() const = 0;
virtual const DictTrie* GetDictTrie() const = 0;
}; // class SegmentTagged

View File

@ -23,100 +23,104 @@
#include "Jieba.hpp"
namespace cppjieba {
using namespace limonp;
using namespace std;
using namespace limonp;
using namespace std;
class TextRankExtractor {
public:
typedef struct _Word {string word;vector<size_t> offsets;double weight;} Word; // struct Word
private:
typedef std::map<string,Word> WordMap;
class WordGraph{
class TextRankExtractor {
public:
typedef struct _Word {
string word;
vector<size_t> offsets;
double weight;
} Word; // struct Word
private:
typedef std::map<string, Word> WordMap;
class WordGraph {
private:
typedef double Score;
typedef string Node;
typedef std::set<Node> NodeSet;
typedef double Score;
typedef string Node;
typedef std::set<Node> NodeSet;
typedef std::map<Node,double> Edges;
typedef std::map<Node,Edges> Graph;
//typedef std::unordered_map<Node,double> Edges;
//typedef std::unordered_map<Node,Edges> Graph;
typedef std::map<Node, double> Edges;
typedef std::map<Node, Edges> Graph;
//typedef std::unordered_map<Node,double> Edges;
//typedef std::unordered_map<Node,Edges> Graph;
double d;
Graph graph;
NodeSet nodeSet;
double d;
Graph graph;
NodeSet nodeSet;
public:
WordGraph(): d(0.85) {};
WordGraph(double in_d): d(in_d) {};
WordGraph(): d(0.85) {};
WordGraph(double in_d): d(in_d) {};
void addEdge(Node start,Node end,double weight){
Edges temp;
Edges::iterator gotEdges;
nodeSet.insert(start);
nodeSet.insert(end);
graph[start][end]+=weight;
graph[end][start]+=weight;
}
void rank(WordMap &ws,size_t rankTime=10){
WordMap outSum;
Score wsdef, min_rank, max_rank;
if( graph.size() == 0)
return;
wsdef = 1.0 / graph.size();
for(Graph::iterator edges=graph.begin();edges!=graph.end();++edges){
// edges->first start节点edge->first end节点edge->second 权重
ws[edges->first].word=edges->first;
ws[edges->first].weight=wsdef;
outSum[edges->first].weight=0;
for(Edges::iterator edge=edges->second.begin();edge!=edges->second.end();++edge){
outSum[edges->first].weight+=edge->second;
}
}
//sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
for( size_t i=0; i<rankTime; i++ ){
for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++ ){
double s = 0;
for( Edges::iterator edge= graph[*node].begin(); edge != graph[*node].end(); edge++ )
// edge->first end节点edge->second 权重
s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
ws[*node].weight = (1 - d) + d * s;
}
void addEdge(Node start, Node end, double weight) {
Edges temp;
Edges::iterator gotEdges;
nodeSet.insert(start);
nodeSet.insert(end);
graph[start][end] += weight;
graph[end][start] += weight;
}
min_rank=max_rank=ws.begin()->second.weight;
for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
if( i->second.weight < min_rank ){
min_rank = i->second.weight;
}
if( i->second.weight > max_rank ){
max_rank = i->second.weight;
}
void rank(WordMap &ws, size_t rankTime = 10) {
WordMap outSum;
Score wsdef, min_rank, max_rank;
if(graph.size() == 0)
return;
wsdef = 1.0 / graph.size();
for(Graph::iterator edges = graph.begin(); edges != graph.end(); ++edges) {
// edges->first start节点edge->first end节点edge->second 权重
ws[edges->first].word = edges->first;
ws[edges->first].weight = wsdef;
outSum[edges->first].weight = 0;
for(Edges::iterator edge = edges->second.begin(); edge != edges->second.end(); ++edge) {
outSum[edges->first].weight += edge->second;
}
}
//sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
for(size_t i = 0; i < rankTime; i++) {
for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++) {
double s = 0;
for(Edges::iterator edge = graph[*node].begin(); edge != graph[*node].end(); edge++)
// edge->first end节点edge->second 权重
s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
ws[*node].weight = (1 - d) + d * s;
}
}
min_rank = max_rank = ws.begin()->second.weight;
for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
if(i->second.weight < min_rank) {
min_rank = i->second.weight;
}
if(i->second.weight > max_rank) {
max_rank = i->second.weight;
}
}
for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++) {
ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
}
}
for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
}
}
};
public:
TextRankExtractor(const string& dictPath,
const string& hmmFilePath,
const string& stopWordPath,
const string& userDict = "")
: segment_(dictPath, hmmFilePath, userDict) {
LoadStopWordDict(stopWordPath);
}
TextRankExtractor(const DictTrie* dictTrie,
const HMMModel* model,
const string& stopWordPath)
: segment_(dictTrie, model) {
LoadStopWordDict(stopWordPath);
}
public:
TextRankExtractor(const string& dictPath,
const string& hmmFilePath,
const string& stopWordPath,
const string& userDict = "")
: segment_(dictPath, hmmFilePath, userDict) {
LoadStopWordDict(stopWordPath);
}
TextRankExtractor(const DictTrie* dictTrie,
const HMMModel* model,
const string& stopWordPath)
: segment_(dictTrie, model) {
LoadStopWordDict(stopWordPath);
}
TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
LoadStopWordDict(stopWordPath);
}
@ -124,83 +128,83 @@ namespace cppjieba {
}
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
vector<Word> topWords;
Extract(sentence, topWords, topN);
for (size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(topWords[i].word);
}
vector<Word> topWords;
Extract(sentence, topWords, topN);
for(size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(topWords[i].word);
}
}
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
vector<Word> topWords;
Extract(sentence, topWords, topN);
for (size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
}
vector<Word> topWords;
Extract(sentence, topWords, topN);
for(size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
}
}
void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span=5,size_t rankTime=10) const {
vector<string> words;
segment_.Cut(sentence, words);
void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span = 5, size_t rankTime = 10) const {
vector<string> words;
segment_.Cut(sentence, words);
TextRankExtractor::WordGraph graph;
WordMap wordmap;
size_t offset = 0;
TextRankExtractor::WordGraph graph;
WordMap wordmap;
size_t offset = 0;
for(size_t i=0; i < words.size(); i++){
size_t t = offset;
offset += words[i].size();
if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
continue;
for(size_t i = 0; i < words.size(); i++) {
size_t t = offset;
offset += words[i].size();
if(IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
continue;
}
for(size_t j = i + 1, skip = 0; j < i + span + skip && j < words.size(); j++) {
if(IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
skip++;
continue;
}
graph.addEdge(words[i], words[j], 1);
}
wordmap[words[i]].offsets.push_back(t);
}
for(size_t j=i+1,skip=0;j<i+span+skip && j<words.size();j++){
if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
skip++;
continue;
}
graph.addEdge(words[i],words[j],1);
if(offset != sentence.size()) {
XLOG(ERROR) << "words illegal";
return;
}
wordmap[words[i]].offsets.push_back(t);
}
if (offset != sentence.size()) {
XLOG(ERROR) << "words illegal";
return;
}
graph.rank(wordmap,rankTime);
keywords.clear();
keywords.reserve(wordmap.size());
for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
keywords.push_back(itr->second);
}
topN = min(topN, keywords.size());
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
keywords.resize(topN);
graph.rank(wordmap, rankTime);
keywords.clear();
keywords.reserve(wordmap.size());
for(WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
keywords.push_back(itr->second);
}
topN = min(topN, keywords.size());
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
keywords.resize(topN);
}
private:
private:
void LoadStopWordDict(const string& filePath) {
ifstream ifs(filePath.c_str());
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
string line ;
while (getline(ifs, line)) {
stopWords_.insert(line);
}
assert(stopWords_.size());
ifstream ifs(filePath.c_str());
XCHECK(ifs.is_open()) << "open " << filePath << " failed";
string line ;
while(getline(ifs, line)) {
stopWords_.insert(line);
}
assert(stopWords_.size());
}
static bool Compare(const Word &x,const Word &y){
return x.weight > y.weight;
static bool Compare(const Word &x, const Word &y) {
return x.weight > y.weight;
}
MixSegment segment_;
unordered_set<string> stopWords_;
}; // class TextRankExtractor
inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}";
}
}; // class TextRankExtractor
inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}";
}
} // namespace cppjieba
#endif

View File

@ -31,9 +31,9 @@ using namespace std;
const size_t MAX_WORD_LENGTH = 512;
struct DictUnit {
Unicode word;
double weight;
string tag;
Unicode word;
double weight;
string tag;
}; // struct DictUnit
// for debugging
@ -44,148 +44,148 @@ struct DictUnit {
// }
struct Dag {
RuneStr runestr;
// [offset, nexts.first]
limonp::LocalVector<pair<size_t, const DictUnit*> > nexts;
const DictUnit * pInfo;
double weight;
size_t nextPos; // TODO
Dag():runestr(), pInfo(NULL), weight(0.0), nextPos(0) {
}
RuneStr runestr;
// [offset, nexts.first]
limonp::LocalVector<pair<size_t, const DictUnit*> > nexts;
const DictUnit * pInfo;
double weight;
size_t nextPos; // TODO
Dag(): runestr(), pInfo(NULL), weight(0.0), nextPos(0) {
}
}; // struct Dag
typedef Rune TrieKey;
class TrieNode {
public :
TrieNode(): next(NULL), ptValue(NULL) {
}
public:
typedef unordered_map<TrieKey, TrieNode*> NextMap;
NextMap *next;
const DictUnit *ptValue;
public :
TrieNode(): next(NULL), ptValue(NULL) {
}
public:
typedef unordered_map<TrieKey, TrieNode*> NextMap;
NextMap *next;
const DictUnit *ptValue;
};
class Trie {
public:
Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
: root_(new TrieNode) {
CreateTrie(keys, valuePointers);
}
~Trie() {
DeleteNode(root_);
}
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
if (begin == end) {
return NULL;
public:
Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
: root_(new TrieNode) {
CreateTrie(keys, valuePointers);
}
~Trie() {
DeleteNode(root_);
}
const TrieNode* ptNode = root_;
TrieNode::NextMap::const_iterator citer;
for (RuneStrArray::const_iterator it = begin; it != end; it++) {
if (NULL == ptNode->next) {
return NULL;
}
citer = ptNode->next->find(it->rune);
if (ptNode->next->end() == citer) {
return NULL;
}
ptNode = citer->second;
}
return ptNode->ptValue;
}
void Find(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<struct Dag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const {
assert(root_ != NULL);
res.resize(end - begin);
const TrieNode *ptNode = NULL;
TrieNode::NextMap::const_iterator citer;
for (size_t i = 0; i < size_t(end - begin); i++) {
res[i].runestr = *(begin + i);
if (root_->next != NULL && root_->next->end() != (citer = root_->next->find(res[i].runestr.rune))) {
ptNode = citer->second;
} else {
ptNode = NULL;
}
if (ptNode != NULL) {
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, ptNode->ptValue));
} else {
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, static_cast<const DictUnit*>(NULL)));
}
for (size_t j = i + 1; j < size_t(end - begin) && (j - i + 1) <= max_word_len; j++) {
if (ptNode == NULL || ptNode->next == NULL) {
break;
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
if(begin == end) {
return NULL;
}
citer = ptNode->next->find((begin + j)->rune);
if (ptNode->next->end() == citer) {
break;
const TrieNode* ptNode = root_;
TrieNode::NextMap::const_iterator citer;
for(RuneStrArray::const_iterator it = begin; it != end; it++) {
if(NULL == ptNode->next) {
return NULL;
}
citer = ptNode->next->find(it->rune);
if(ptNode->next->end() == citer) {
return NULL;
}
ptNode = citer->second;
}
ptNode = citer->second;
if (NULL != ptNode->ptValue) {
res[i].nexts.push_back(pair<size_t, const DictUnit*>(j, ptNode->ptValue));
return ptNode->ptValue;
}
void Find(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<struct Dag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const {
assert(root_ != NULL);
res.resize(end - begin);
const TrieNode *ptNode = NULL;
TrieNode::NextMap::const_iterator citer;
for(size_t i = 0; i < size_t(end - begin); i++) {
res[i].runestr = *(begin + i);
if(root_->next != NULL && root_->next->end() != (citer = root_->next->find(res[i].runestr.rune))) {
ptNode = citer->second;
} else {
ptNode = NULL;
}
if(ptNode != NULL) {
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, ptNode->ptValue));
} else {
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, static_cast<const DictUnit*>(NULL)));
}
for(size_t j = i + 1; j < size_t(end - begin) && (j - i + 1) <= max_word_len; j++) {
if(ptNode == NULL || ptNode->next == NULL) {
break;
}
citer = ptNode->next->find((begin + j)->rune);
if(ptNode->next->end() == citer) {
break;
}
ptNode = citer->second;
if(NULL != ptNode->ptValue) {
res[i].nexts.push_back(pair<size_t, const DictUnit*>(j, ptNode->ptValue));
}
}
}
}
}
}
void InsertNode(const Unicode& key, const DictUnit* ptValue) {
if (key.begin() == key.end()) {
return;
}
TrieNode::NextMap::const_iterator kmIter;
TrieNode *ptNode = root_;
for (Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) {
if (NULL == ptNode->next) {
ptNode->next = new TrieNode::NextMap;
}
kmIter = ptNode->next->find(*citer);
if (ptNode->next->end() == kmIter) {
TrieNode *nextNode = new TrieNode;
void InsertNode(const Unicode& key, const DictUnit* ptValue) {
if(key.begin() == key.end()) {
return;
}
ptNode->next->insert(make_pair(*citer, nextNode));
ptNode = nextNode;
} else {
ptNode = kmIter->second;
}
}
assert(ptNode != NULL);
ptNode->ptValue = ptValue;
}
TrieNode::NextMap::const_iterator kmIter;
TrieNode *ptNode = root_;
for(Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) {
if(NULL == ptNode->next) {
ptNode->next = new TrieNode::NextMap;
}
kmIter = ptNode->next->find(*citer);
if(ptNode->next->end() == kmIter) {
TrieNode *nextNode = new TrieNode;
private:
void CreateTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
if (valuePointers.empty() || keys.empty()) {
return;
ptNode->next->insert(make_pair(*citer, nextNode));
ptNode = nextNode;
} else {
ptNode = kmIter->second;
}
}
assert(ptNode != NULL);
ptNode->ptValue = ptValue;
}
assert(keys.size() == valuePointers.size());
for (size_t i = 0; i < keys.size(); i++) {
InsertNode(keys[i], valuePointers[i]);
}
}
private:
void CreateTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
if(valuePointers.empty() || keys.empty()) {
return;
}
assert(keys.size() == valuePointers.size());
void DeleteNode(TrieNode* node) {
if (NULL == node) {
return;
for(size_t i = 0; i < keys.size(); i++) {
InsertNode(keys[i], valuePointers[i]);
}
}
if (NULL != node->next) {
for (TrieNode::NextMap::iterator it = node->next->begin(); it != node->next->end(); ++it) {
DeleteNode(it->second);
}
delete node->next;
}
delete node;
}
TrieNode* root_;
void DeleteNode(TrieNode* node) {
if(NULL == node) {
return;
}
if(NULL != node->next) {
for(TrieNode::NextMap::iterator it = node->next->begin(); it != node->next->end(); ++it) {
DeleteNode(it->second);
}
delete node->next;
}
delete node;
}
TrieNode* root_;
}; // class Trie
} // namespace cppjieba

View File

@ -34,40 +34,40 @@ using std::vector;
typedef uint32_t Rune;
struct Word {
string word;
uint32_t offset;
uint32_t unicode_offset;
uint32_t unicode_length;
Word(const string& w, uint32_t o)
: word(w), offset(o) {
}
Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
: word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
}
string word;
uint32_t offset;
uint32_t unicode_offset;
uint32_t unicode_length;
Word(const string& w, uint32_t o)
: word(w), offset(o) {
}
Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
: word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
}
}; // struct Word
inline std::ostream& operator << (std::ostream& os, const Word& w) {
return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
}
struct RuneStr {
Rune rune;
uint32_t offset;
uint32_t len;
uint32_t unicode_offset;
uint32_t unicode_length;
RuneStr(): rune(0), offset(0), len(0), unicode_offset(0), unicode_length(0) {
}
RuneStr(Rune r, uint32_t o, uint32_t l)
: rune(r), offset(o), len(l), unicode_offset(0), unicode_length(0) {
}
RuneStr(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
: rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
}
Rune rune;
uint32_t offset;
uint32_t len;
uint32_t unicode_offset;
uint32_t unicode_length;
RuneStr(): rune(0), offset(0), len(0), unicode_offset(0), unicode_length(0) {
}
RuneStr(Rune r, uint32_t o, uint32_t l)
: rune(r), offset(o), len(l), unicode_offset(0), unicode_length(0) {
}
RuneStr(Rune r, uint32_t o, uint32_t l, uint32_t unicode_offset, uint32_t unicode_length)
: rune(r), offset(o), len(l), unicode_offset(unicode_offset), unicode_length(unicode_length) {
}
}; // struct RuneStr
inline std::ostream& operator << (std::ostream& os, const RuneStr& r) {
return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
return os << "{\"rune\": \"" << r.rune << "\", \"offset\": " << r.offset << ", \"len\": " << r.len << "}";
}
typedef limonp::LocalVector<Rune> Unicode;
@ -75,169 +75,169 @@ typedef limonp::LocalVector<struct RuneStr> RuneStrArray;
// [left, right]
struct WordRange {
RuneStrArray::const_iterator left;
RuneStrArray::const_iterator right;
WordRange(RuneStrArray::const_iterator l, RuneStrArray::const_iterator r)
: left(l), right(r) {
}
size_t Length() const {
return right - left + 1;
}
bool IsAllAscii() const {
for (RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
if (iter->rune >= 0x80) {
return false;
}
RuneStrArray::const_iterator left;
RuneStrArray::const_iterator right;
WordRange(RuneStrArray::const_iterator l, RuneStrArray::const_iterator r)
: left(l), right(r) {
}
size_t Length() const {
return right - left + 1;
}
bool IsAllAscii() const {
for(RuneStrArray::const_iterator iter = left; iter <= right; ++iter) {
if(iter->rune >= 0x80) {
return false;
}
}
return true;
}
return true;
}
}; // struct WordRange
struct RuneStrLite {
uint32_t rune;
uint32_t len;
RuneStrLite(): rune(0), len(0) {
}
RuneStrLite(uint32_t r, uint32_t l): rune(r), len(l) {
}
uint32_t rune;
uint32_t len;
RuneStrLite(): rune(0), len(0) {
}
RuneStrLite(uint32_t r, uint32_t l): rune(r), len(l) {
}
}; // struct RuneStrLite
inline RuneStrLite DecodeRuneInString(const char* str, size_t len) {
RuneStrLite rp(0, 0);
if (str == NULL || len == 0) {
RuneStrLite rp(0, 0);
if(str == NULL || len == 0) {
return rp;
}
if(!(str[0] & 0x80)) { // 0xxxxxxx
// 7bit, total 7bit
rp.rune = (uint8_t)(str[0]) & 0x7f;
rp.len = 1;
} else if((uint8_t)str[0] <= 0xdf && 1 < len) {
// 110xxxxxx
// 5bit, total 5bit
rp.rune = (uint8_t)(str[0]) & 0x1f;
// 6bit, total 11bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[1]) & 0x3f;
rp.len = 2;
} else if((uint8_t)str[0] <= 0xef && 2 < len) { // 1110xxxxxx
// 4bit, total 4bit
rp.rune = (uint8_t)(str[0]) & 0x0f;
// 6bit, total 10bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[1]) & 0x3f;
// 6bit, total 16bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[2]) & 0x3f;
rp.len = 3;
} else if((uint8_t)str[0] <= 0xf7 && 3 < len) { // 11110xxxx
// 3bit, total 3bit
rp.rune = (uint8_t)(str[0]) & 0x07;
// 6bit, total 9bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[1]) & 0x3f;
// 6bit, total 15bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[2]) & 0x3f;
// 6bit, total 21bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[3]) & 0x3f;
rp.len = 4;
} else {
rp.rune = 0;
rp.len = 0;
}
return rp;
}
if (!(str[0] & 0x80)) { // 0xxxxxxx
// 7bit, total 7bit
rp.rune = (uint8_t)(str[0]) & 0x7f;
rp.len = 1;
} else if ((uint8_t)str[0] <= 0xdf && 1 < len) {
// 110xxxxxx
// 5bit, total 5bit
rp.rune = (uint8_t)(str[0]) & 0x1f;
// 6bit, total 11bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[1]) & 0x3f;
rp.len = 2;
} else if((uint8_t)str[0] <= 0xef && 2 < len) { // 1110xxxxxx
// 4bit, total 4bit
rp.rune = (uint8_t)(str[0]) & 0x0f;
// 6bit, total 10bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[1]) & 0x3f;
// 6bit, total 16bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[2]) & 0x3f;
rp.len = 3;
} else if((uint8_t)str[0] <= 0xf7 && 3 < len) { // 11110xxxx
// 3bit, total 3bit
rp.rune = (uint8_t)(str[0]) & 0x07;
// 6bit, total 9bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[1]) & 0x3f;
// 6bit, total 15bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[2]) & 0x3f;
// 6bit, total 21bit
rp.rune <<= 6;
rp.rune |= (uint8_t)(str[3]) & 0x3f;
rp.len = 4;
} else {
rp.rune = 0;
rp.len = 0;
}
return rp;
}
inline bool DecodeRunesInString(const char* s, size_t len, RuneStrArray& runes) {
runes.clear();
runes.reserve(len / 2);
for (uint32_t i = 0, j = 0; i < len;) {
RuneStrLite rp = DecodeRuneInString(s + i, len - i);
if (rp.len == 0) {
runes.clear();
return false;
runes.clear();
runes.reserve(len / 2);
for(uint32_t i = 0, j = 0; i < len;) {
RuneStrLite rp = DecodeRuneInString(s + i, len - i);
if(rp.len == 0) {
runes.clear();
return false;
}
RuneStr x(rp.rune, i, rp.len, j, 1);
runes.push_back(x);
i += rp.len;
++j;
}
RuneStr x(rp.rune, i, rp.len, j, 1);
runes.push_back(x);
i += rp.len;
++j;
}
return true;
return true;
}
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
return DecodeRunesInString(s.c_str(), s.size(), runes);
return DecodeRunesInString(s.c_str(), s.size(), runes);
}
inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
unicode.clear();
RuneStrArray runes;
if (!DecodeRunesInString(s, len, runes)) {
return false;
}
unicode.reserve(runes.size());
for (size_t i = 0; i < runes.size(); i++) {
unicode.push_back(runes[i].rune);
}
return true;
unicode.clear();
RuneStrArray runes;
if(!DecodeRunesInString(s, len, runes)) {
return false;
}
unicode.reserve(runes.size());
for(size_t i = 0; i < runes.size(); i++) {
unicode.push_back(runes[i].rune);
}
return true;
}
inline bool IsSingleWord(const string& str) {
RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size());
return rp.len == str.size();
RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size());
return rp.len == str.size();
}
inline bool DecodeRunesInString(const string& s, Unicode& unicode) {
return DecodeRunesInString(s.c_str(), s.size(), unicode);
return DecodeRunesInString(s.c_str(), s.size(), unicode);
}
inline Unicode DecodeRunesInString(const string& s) {
Unicode result;
DecodeRunesInString(s, result);
return result;
Unicode result;
DecodeRunesInString(s, result);
return result;
}
// [left, right]
inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
assert(right->offset >= left->offset);
uint32_t len = right->offset - left->offset + right->len;
uint32_t unicode_length = right->unicode_offset - left->unicode_offset + right->unicode_length;
return Word(s.substr(left->offset, len), left->offset, left->unicode_offset, unicode_length);
assert(right->offset >= left->offset);
uint32_t len = right->offset - left->offset + right->len;
uint32_t unicode_length = right->unicode_offset - left->unicode_offset + right->unicode_length;
return Word(s.substr(left->offset, len), left->offset, left->unicode_offset, unicode_length);
}
inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
assert(right->offset >= left->offset);
uint32_t len = right->offset - left->offset + right->len;
return s.substr(left->offset, len);
assert(right->offset >= left->offset);
uint32_t len = right->offset - left->offset + right->len;
return s.substr(left->offset, len);
}
inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
for (size_t i = 0; i < wrs.size(); i++) {
words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
}
for(size_t i = 0; i < wrs.size(); i++) {
words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
}
}
inline vector<Word> GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs) {
vector<Word> result;
GetWordsFromWordRanges(s, wrs, result);
return result;
vector<Word> result;
GetWordsFromWordRanges(s, wrs, result);
return result;
}
inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
strs.resize(words.size());
for (size_t i = 0; i < words.size(); ++i) {
strs[i] = words[i].word;
}
strs.resize(words.size());
for(size_t i = 0; i < words.size(); ++i) {
strs[i] = words[i].word;
}
}
} // namespace cppjieba

View File

@ -33,54 +33,54 @@ namespace limonp {
using namespace std;
class ArgvContext {
public :
ArgvContext(int argc, const char* const * argv) {
for(int i = 0; i < argc; i++) {
if(StartsWith(argv[i], "-")) {
if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
mpss_[argv[i]] = argv[i+1];
i++;
} else {
sset_.insert(argv[i]);
public :
ArgvContext(int argc, const char* const * argv) {
for(int i = 0; i < argc; i++) {
if(StartsWith(argv[i], "-")) {
if(i + 1 < argc && !StartsWith(argv[i + 1], "-")) {
mpss_[argv[i]] = argv[i + 1];
i++;
} else {
sset_.insert(argv[i]);
}
} else {
args_.push_back(argv[i]);
}
}
} else {
args_.push_back(argv[i]);
}
}
}
~ArgvContext() {
}
~ArgvContext() {
}
friend ostream& operator << (ostream& os, const ArgvContext& args);
string operator [](size_t i) const {
if(i < args_.size()) {
return args_[i];
friend ostream& operator << (ostream& os, const ArgvContext& args);
string operator [](size_t i) const {
if(i < args_.size()) {
return args_[i];
}
return "";
}
return "";
}
string operator [](const string& key) const {
map<string, string>::const_iterator it = mpss_.find(key);
if(it != mpss_.end()) {
return it->second;
string operator [](const string& key) const {
map<string, string>::const_iterator it = mpss_.find(key);
if(it != mpss_.end()) {
return it->second;
}
return "";
}
return "";
}
bool HasKey(const string& key) const {
if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
return true;
bool HasKey(const string& key) const {
if(mpss_.find(key) != mpss_.end() || sset_.find(key) != sset_.end()) {
return true;
}
return false;
}
return false;
}
private:
vector<string> args_;
map<string, string> mpss_;
set<string> sset_;
private:
vector<string> args_;
map<string, string> mpss_;
set<string> sset_;
}; // class ArgvContext
inline ostream& operator << (ostream& os, const ArgvContext& args) {
return os<<args.args_<<args.mpss_<<args.sset_;
return os << args.args_ << args.mpss_ << args.sset_;
}
} // namespace limonp

View File

@ -25,41 +25,41 @@
namespace limonp {
template<class T>
class BlockingQueue: NonCopyable {
public:
BlockingQueue()
: mutex_(), notEmpty_(mutex_), queue_() {
}
void Push(const T& x) {
MutexLockGuard lock(mutex_);
queue_.push(x);
notEmpty_.Notify(); // Wait morphing saves us
}
T Pop() {
MutexLockGuard lock(mutex_);
// always use a while-loop, due to spurious wakeup
while (queue_.empty()) {
notEmpty_.Wait();
public:
BlockingQueue()
: mutex_(), notEmpty_(mutex_), queue_() {
}
assert(!queue_.empty());
T front(queue_.front());
queue_.pop();
return front;
}
size_t Size() const {
MutexLockGuard lock(mutex_);
return queue_.size();
}
bool Empty() const {
return Size() == 0;
}
void Push(const T& x) {
MutexLockGuard lock(mutex_);
queue_.push(x);
notEmpty_.Notify(); // Wait morphing saves us
}
private:
mutable MutexLock mutex_;
Condition notEmpty_;
std::queue<T> queue_;
T Pop() {
MutexLockGuard lock(mutex_);
// always use a while-loop, due to spurious wakeup
while(queue_.empty()) {
notEmpty_.Wait();
}
assert(!queue_.empty());
T front(queue_.front());
queue_.pop();
return front;
}
size_t Size() const {
MutexLockGuard lock(mutex_);
return queue_.size();
}
bool Empty() const {
return Size() == 0;
}
private:
mutable MutexLock mutex_;
Condition notEmpty_;
std::queue<T> queue_;
}; // class BlockingQueue
} // namespace limonp

View File

@ -25,59 +25,59 @@ namespace limonp {
template<typename T>
class BoundedBlockingQueue : NonCopyable {
public:
explicit BoundedBlockingQueue(size_t maxSize)
: mutex_(),
notEmpty_(mutex_),
notFull_(mutex_),
queue_(maxSize) {
}
void Push(const T& x) {
MutexLockGuard lock(mutex_);
while (queue_.Full()) {
notFull_.Wait();
public:
explicit BoundedBlockingQueue(size_t maxSize)
: mutex_(),
notEmpty_(mutex_),
notFull_(mutex_),
queue_(maxSize) {
}
assert(!queue_.Full());
queue_.Push(x);
notEmpty_.Notify();
}
T Pop() {
MutexLockGuard lock(mutex_);
while (queue_.Empty()) {
notEmpty_.Wait();
void Push(const T& x) {
MutexLockGuard lock(mutex_);
while(queue_.Full()) {
notFull_.Wait();
}
assert(!queue_.Full());
queue_.Push(x);
notEmpty_.Notify();
}
assert(!queue_.Empty());
T res = queue_.Pop();
notFull_.Notify();
return res;
}
bool Empty() const {
MutexLockGuard lock(mutex_);
return queue_.Empty();
}
T Pop() {
MutexLockGuard lock(mutex_);
while(queue_.Empty()) {
notEmpty_.Wait();
}
assert(!queue_.Empty());
T res = queue_.Pop();
notFull_.Notify();
return res;
}
bool Full() const {
MutexLockGuard lock(mutex_);
return queue_.Full();
}
bool Empty() const {
MutexLockGuard lock(mutex_);
return queue_.Empty();
}
size_t size() const {
MutexLockGuard lock(mutex_);
return queue_.size();
}
bool Full() const {
MutexLockGuard lock(mutex_);
return queue_.Full();
}
size_t capacity() const {
return queue_.capacity();
}
size_t size() const {
MutexLockGuard lock(mutex_);
return queue_.size();
}
private:
mutable MutexLock mutex_;
Condition notEmpty_;
Condition notFull_;
BoundedQueue<T> queue_;
size_t capacity() const {
return queue_.capacity();
}
private:
mutable MutexLock mutex_;
Condition notEmpty_;
Condition notFull_;
BoundedQueue<T> queue_;
}; // class BoundedBlockingQueue
} // namespace limonp

View File

@ -27,55 +27,55 @@ namespace limonp {
using namespace std;
template<class T>
class BoundedQueue {
public:
explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
head_ = 0;
tail_ = 0;
size_ = 0;
assert(capacity_);
}
~BoundedQueue() {
}
public:
explicit BoundedQueue(size_t capacity): capacity_(capacity), circular_buffer_(capacity) {
head_ = 0;
tail_ = 0;
size_ = 0;
assert(capacity_);
}
~BoundedQueue() {
}
void Clear() {
head_ = 0;
tail_ = 0;
size_ = 0;
}
bool Empty() const {
return !size_;
}
bool Full() const {
return capacity_ == size_;
}
size_t Size() const {
return size_;
}
size_t Capacity() const {
return capacity_;
}
void Clear() {
head_ = 0;
tail_ = 0;
size_ = 0;
}
bool Empty() const {
return !size_;
}
bool Full() const {
return capacity_ == size_;
}
size_t Size() const {
return size_;
}
size_t Capacity() const {
return capacity_;
}
void Push(const T& t) {
assert(!Full());
circular_buffer_[tail_] = t;
tail_ = (tail_ + 1) % capacity_;
size_ ++;
}
void Push(const T& t) {
assert(!Full());
circular_buffer_[tail_] = t;
tail_ = (tail_ + 1) % capacity_;
size_ ++;
}
T Pop() {
assert(!Empty());
size_t oldPos = head_;
head_ = (head_ + 1) % capacity_;
size_ --;
return circular_buffer_[oldPos];
}
T Pop() {
assert(!Empty());
size_t oldPos = head_;
head_ = (head_ + 1) % capacity_;
size_ --;
return circular_buffer_[oldPos];
}
private:
size_t head_;
size_t tail_;
size_t size_;
const size_t capacity_;
vector<T> circular_buffer_;
private:
size_t head_;
size_t tail_;
size_t size_;
const size_t capacity_;
vector<T> circular_buffer_;
}; // class BoundedQueue
} // namespace limonp

View File

@ -22,201 +22,201 @@
namespace limonp {
class ClosureInterface {
public:
virtual ~ClosureInterface() {
}
virtual void Run() = 0;
public:
virtual ~ClosureInterface() {
}
virtual void Run() = 0;
};
template <class Funct>
class Closure0: public ClosureInterface {
public:
Closure0(Funct fun) {
fun_ = fun;
}
virtual ~Closure0() {
}
virtual void Run() {
(*fun_)();
}
private:
Funct fun_;
};
public:
Closure0(Funct fun) {
fun_ = fun;
}
virtual ~Closure0() {
}
virtual void Run() {
(*fun_)();
}
private:
Funct fun_;
};
template <class Funct, class Arg1>
class Closure1: public ClosureInterface {
public:
Closure1(Funct fun, Arg1 arg1) {
fun_ = fun;
arg1_ = arg1;
}
virtual ~Closure1() {
}
virtual void Run() {
(*fun_)(arg1_);
}
private:
Funct fun_;
Arg1 arg1_;
};
public:
Closure1(Funct fun, Arg1 arg1) {
fun_ = fun;
arg1_ = arg1;
}
virtual ~Closure1() {
}
virtual void Run() {
(*fun_)(arg1_);
}
private:
Funct fun_;
Arg1 arg1_;
};
template <class Funct, class Arg1, class Arg2>
class Closure2: public ClosureInterface {
public:
Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
fun_ = fun;
arg1_ = arg1;
arg2_ = arg2;
}
virtual ~Closure2() {
}
virtual void Run() {
(*fun_)(arg1_, arg2_);
}
private:
Funct fun_;
Arg1 arg1_;
Arg2 arg2_;
};
public:
Closure2(Funct fun, Arg1 arg1, Arg2 arg2) {
fun_ = fun;
arg1_ = arg1;
arg2_ = arg2;
}
virtual ~Closure2() {
}
virtual void Run() {
(*fun_)(arg1_, arg2_);
}
private:
Funct fun_;
Arg1 arg1_;
Arg2 arg2_;
};
template <class Funct, class Arg1, class Arg2, class Arg3>
class Closure3: public ClosureInterface {
public:
Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
fun_ = fun;
arg1_ = arg1;
arg2_ = arg2;
arg3_ = arg3;
}
virtual ~Closure3() {
}
virtual void Run() {
(*fun_)(arg1_, arg2_, arg3_);
}
private:
Funct fun_;
Arg1 arg1_;
Arg2 arg2_;
Arg3 arg3_;
};
public:
Closure3(Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
fun_ = fun;
arg1_ = arg1;
arg2_ = arg2;
arg3_ = arg3;
}
virtual ~Closure3() {
}
virtual void Run() {
(*fun_)(arg1_, arg2_, arg3_);
}
private:
Funct fun_;
Arg1 arg1_;
Arg2 arg2_;
Arg3 arg3_;
};
template <class Obj, class Funct>
template <class Obj, class Funct>
class ObjClosure0: public ClosureInterface {
public:
ObjClosure0(Obj* p, Funct fun) {
p_ = p;
fun_ = fun;
}
virtual ~ObjClosure0() {
}
virtual void Run() {
(p_->*fun_)();
}
private:
Obj* p_;
Funct fun_;
};
public:
ObjClosure0(Obj* p, Funct fun) {
p_ = p;
fun_ = fun;
}
virtual ~ObjClosure0() {
}
virtual void Run() {
(p_->*fun_)();
}
private:
Obj* p_;
Funct fun_;
};
template <class Obj, class Funct, class Arg1>
template <class Obj, class Funct, class Arg1>
class ObjClosure1: public ClosureInterface {
public:
ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
p_ = p;
fun_ = fun;
arg1_ = arg1;
}
virtual ~ObjClosure1() {
}
virtual void Run() {
(p_->*fun_)(arg1_);
}
private:
Obj* p_;
Funct fun_;
Arg1 arg1_;
};
public:
ObjClosure1(Obj* p, Funct fun, Arg1 arg1) {
p_ = p;
fun_ = fun;
arg1_ = arg1;
}
virtual ~ObjClosure1() {
}
virtual void Run() {
(p_->*fun_)(arg1_);
}
private:
Obj* p_;
Funct fun_;
Arg1 arg1_;
};
template <class Obj, class Funct, class Arg1, class Arg2>
template <class Obj, class Funct, class Arg1, class Arg2>
class ObjClosure2: public ClosureInterface {
public:
ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
p_ = p;
fun_ = fun;
arg1_ = arg1;
arg2_ = arg2;
}
virtual ~ObjClosure2() {
}
virtual void Run() {
(p_->*fun_)(arg1_, arg2_);
}
private:
Obj* p_;
Funct fun_;
Arg1 arg1_;
Arg2 arg2_;
};
template <class Obj, class Funct, class Arg1, class Arg2, class Arg3>
public:
ObjClosure2(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2) {
p_ = p;
fun_ = fun;
arg1_ = arg1;
arg2_ = arg2;
}
virtual ~ObjClosure2() {
}
virtual void Run() {
(p_->*fun_)(arg1_, arg2_);
}
private:
Obj* p_;
Funct fun_;
Arg1 arg1_;
Arg2 arg2_;
};
template <class Obj, class Funct, class Arg1, class Arg2, class Arg3>
class ObjClosure3: public ClosureInterface {
public:
ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
p_ = p;
fun_ = fun;
arg1_ = arg1;
arg2_ = arg2;
arg3_ = arg3;
}
virtual ~ObjClosure3() {
}
virtual void Run() {
(p_->*fun_)(arg1_, arg2_, arg3_);
}
private:
Obj* p_;
Funct fun_;
Arg1 arg1_;
Arg2 arg2_;
Arg3 arg3_;
};
public:
ObjClosure3(Obj* p, Funct fun, Arg1 arg1, Arg2 arg2, Arg3 arg3) {
p_ = p;
fun_ = fun;
arg1_ = arg1;
arg2_ = arg2;
arg3_ = arg3;
}
virtual ~ObjClosure3() {
}
virtual void Run() {
(p_->*fun_)(arg1_, arg2_, arg3_);
}
private:
Obj* p_;
Funct fun_;
Arg1 arg1_;
Arg2 arg2_;
Arg3 arg3_;
};
template<class R>
ClosureInterface* NewClosure(R (*fun)()) {
return new Closure0<R (*)()>(fun);
ClosureInterface* NewClosure(R(*fun)()) {
return new Closure0<R(*)()>(fun);
}
template<class R, class Arg1>
ClosureInterface* NewClosure(R (*fun)(Arg1), Arg1 arg1) {
return new Closure1<R (*)(Arg1), Arg1>(fun, arg1);
ClosureInterface* NewClosure(R(*fun)(Arg1), Arg1 arg1) {
return new Closure1<R(*)(Arg1), Arg1>(fun, arg1);
}
template<class R, class Arg1, class Arg2>
ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
return new Closure2<R (*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
ClosureInterface* NewClosure(R(*fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
return new Closure2<R(*)(Arg1, Arg2), Arg1, Arg2>(fun, arg1, arg2);
}
template<class R, class Arg1, class Arg2, class Arg3>
ClosureInterface* NewClosure(R (*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
return new Closure3<R (*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
ClosureInterface* NewClosure(R(*fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
return new Closure3<R(*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(fun, arg1, arg2, arg3);
}
template<class R, class Obj>
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)()) {
return new ObjClosure0<Obj, R (Obj::* )()>(obj, fun);
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)()) {
return new ObjClosure0<Obj, R(Obj::*)()>(obj, fun);
}
template<class R, class Obj, class Arg1>
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1), Arg1 arg1) {
return new ObjClosure1<Obj, R (Obj::* )(Arg1), Arg1>(obj, fun, arg1);
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1), Arg1 arg1) {
return new ObjClosure1<Obj, R(Obj::*)(Arg1), Arg1>(obj, fun, arg1);
}
template<class R, class Obj, class Arg1, class Arg2>
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
return new ObjClosure2<Obj, R (Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1, Arg2), Arg1 arg1, Arg2 arg2) {
return new ObjClosure2<Obj, R(Obj::*)(Arg1, Arg2), Arg1, Arg2>(obj, fun, arg1, arg2);
}
template<class R, class Obj, class Arg1, class Arg2, class Arg3>
ClosureInterface* NewClosure(Obj* obj, R (Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
return new ObjClosure3<Obj, R (Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
ClosureInterface* NewClosure(Obj* obj, R(Obj::* fun)(Arg1, Arg2, Arg3), Arg1 arg1, Arg2 arg2, Arg3 arg3) {
return new ObjClosure3<Obj, R(Obj::*)(Arg1, Arg2, Arg3), Arg1, Arg2, Arg3>(obj, fun, arg1, arg2, arg3);
}
} // namespace limonp

View File

@ -27,21 +27,21 @@ namespace limonp {
using std::string;
enum Color {
BLACK = 30,
RED,
GREEN,
YELLOW,
BLUE,
PURPLE
BLACK = 30,
RED,
GREEN,
YELLOW,
BLUE,
PURPLE
}; // enum Color
static void ColorPrintln(enum Color color, const char * fmt, ...) {
va_list ap;
printf("\033[0;%dm", color);
va_start(ap, fmt);
vprintf(fmt, ap);
va_end(ap);
printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
va_list ap;
printf("\033[0;%dm", color);
va_start(ap, fmt);
vprintf(fmt, ap);
va_end(ap);
printf("\033[0m\n"); // if not \n , in some situation , the next lines will be set the same color unexpectedly
}
} // namespace limonp

View File

@ -24,31 +24,31 @@
namespace limonp {
class Condition : NonCopyable {
public:
explicit Condition(MutexLock& mutex)
: mutex_(mutex) {
XCHECK(!pthread_cond_init(&pcond_, NULL));
}
public:
explicit Condition(MutexLock& mutex)
: mutex_(mutex) {
XCHECK(!pthread_cond_init(&pcond_, NULL));
}
~Condition() {
XCHECK(!pthread_cond_destroy(&pcond_));
}
~Condition() {
XCHECK(!pthread_cond_destroy(&pcond_));
}
void Wait() {
XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
}
void Wait() {
XCHECK(!pthread_cond_wait(&pcond_, mutex_.GetPthreadMutex()));
}
void Notify() {
XCHECK(!pthread_cond_signal(&pcond_));
}
void Notify() {
XCHECK(!pthread_cond_signal(&pcond_));
}
void NotifyAll() {
XCHECK(!pthread_cond_broadcast(&pcond_));
}
void NotifyAll() {
XCHECK(!pthread_cond_broadcast(&pcond_));
}
private:
MutexLock& mutex_;
pthread_cond_t pcond_;
private:
MutexLock& mutex_;
pthread_cond_t pcond_;
}; // class Condition
} // namespace limonp

View File

@ -34,86 +34,86 @@ namespace limonp {
using namespace std;
class Config {
public:
explicit Config(const string& filePath) {
LoadFile(filePath);
}
operator bool () {
return !map_.empty();
}
string Get(const string& key, const string& defaultvalue) const {
map<string, string>::const_iterator it = map_.find(key);
if(map_.end() != it) {
return it->second;
public:
explicit Config(const string& filePath) {
LoadFile(filePath);
}
return defaultvalue;
}
int Get(const string& key, int defaultvalue) const {
string str = Get(key, "");
if("" == str) {
return defaultvalue;
}
return atoi(str.c_str());
}
const char* operator [] (const char* key) const {
if(NULL == key) {
return NULL;
}
map<string, string>::const_iterator it = map_.find(key);
if(map_.end() != it) {
return it->second.c_str();
}
return NULL;
}
string GetConfigInfo() const {
string res;
res << *this;
return res;
}
private:
void LoadFile(const string& filePath) {
ifstream ifs(filePath.c_str());
assert(ifs);
string line;
vector<string> vecBuf;
size_t lineno = 0;
while(getline(ifs, line)) {
lineno ++;
Trim(line);
if(line.empty() || StartsWith(line, "#")) {
continue;
}
vecBuf.clear();
Split(line, vecBuf, "=");
if(2 != vecBuf.size()) {
fprintf(stderr, "line[%s] illegal.\n", line.c_str());
assert(false);
continue;
}
string& key = vecBuf[0];
string& value = vecBuf[1];
Trim(key);
Trim(value);
if(!map_.insert(make_pair(key, value)).second) {
fprintf(stderr, "key[%s] already exits.\n", key.c_str());
assert(false);
continue;
}
operator bool () {
return !map_.empty();
}
ifs.close();
}
friend ostream& operator << (ostream& os, const Config& config);
string Get(const string& key, const string& defaultvalue) const {
map<string, string>::const_iterator it = map_.find(key);
if(map_.end() != it) {
return it->second;
}
return defaultvalue;
}
int Get(const string& key, int defaultvalue) const {
string str = Get(key, "");
if("" == str) {
return defaultvalue;
}
return atoi(str.c_str());
}
const char* operator [](const char* key) const {
if(NULL == key) {
return NULL;
}
map<string, string>::const_iterator it = map_.find(key);
if(map_.end() != it) {
return it->second.c_str();
}
return NULL;
}
map<string, string> map_;
string GetConfigInfo() const {
string res;
res << *this;
return res;
}
private:
void LoadFile(const string& filePath) {
ifstream ifs(filePath.c_str());
assert(ifs);
string line;
vector<string> vecBuf;
size_t lineno = 0;
while(getline(ifs, line)) {
lineno ++;
Trim(line);
if(line.empty() || StartsWith(line, "#")) {
continue;
}
vecBuf.clear();
Split(line, vecBuf, "=");
if(2 != vecBuf.size()) {
fprintf(stderr, "line[%s] illegal.\n", line.c_str());
assert(false);
continue;
}
string& key = vecBuf[0];
string& value = vecBuf[1];
Trim(key);
Trim(value);
if(!map_.insert(make_pair(key, value)).second) {
fprintf(stderr, "key[%s] already exits.\n", key.c_str());
assert(false);
continue;
}
}
ifs.close();
}
friend ostream& operator << (ostream& os, const Config& config);
map<string, string> map_;
}; // class Config
inline ostream& operator << (ostream& os, const Config& config) {
return os << config.map_;
return os << config.map_;
}
} // namespace limonp

View File

@ -33,58 +33,58 @@ namespace limonp {
using std::string;
class FileLock {
public:
FileLock() : fd_(-1), ok_(true) {
}
~FileLock() {
if(fd_ > 0) {
Close();
public:
FileLock() : fd_(-1), ok_(true) {
}
}
void Open(const string& fname) {
assert(fd_ == -1);
fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
if(fd_ < 0) {
ok_ = false;
err_ = strerror(errno);
~FileLock() {
if(fd_ > 0) {
Close();
}
}
}
void Close() {
::close(fd_);
}
void Lock() {
if(LockOrUnlock(fd_, true) < 0) {
ok_ = false;
err_ = strerror(errno);
void Open(const string& fname) {
assert(fd_ == -1);
fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
if(fd_ < 0) {
ok_ = false;
err_ = strerror(errno);
}
}
}
void UnLock() {
if(LockOrUnlock(fd_, false) < 0) {
ok_ = false;
err_ = strerror(errno);
void Close() {
::close(fd_);
}
void Lock() {
if(LockOrUnlock(fd_, true) < 0) {
ok_ = false;
err_ = strerror(errno);
}
}
void UnLock() {
if(LockOrUnlock(fd_, false) < 0) {
ok_ = false;
err_ = strerror(errno);
}
}
bool Ok() const {
return ok_;
}
string Error() const {
return err_;
}
private:
static int LockOrUnlock(int fd, bool lock) {
errno = 0;
struct flock f;
memset(&f, 0, sizeof(f));
f.l_type = (lock ? F_WRLCK : F_UNLCK);
f.l_whence = SEEK_SET;
f.l_start = 0;
f.l_len = 0; // Lock/unlock entire file
return fcntl(fd, F_SETLK, &f);
}
}
bool Ok() const {
return ok_;
}
string Error() const {
return err_;
}
private:
static int LockOrUnlock(int fd, bool lock) {
errno = 0;
struct flock f;
memset(&f, 0, sizeof(f));
f.l_type = (lock ? F_WRLCK : F_UNLCK);
f.l_whence = SEEK_SET;
f.l_start = 0;
f.l_len = 0; // Lock/unlock entire file
return fcntl(fd, F_SETLK, &f);
}
int fd_;
bool ok_;
string err_;
int fd_;
bool ok_;
string err_;
}; // class FileLock
}// namespace limonp

View File

@ -33,123 +33,123 @@ using namespace std;
const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
template <class T>
class LocalVector {
public:
typedef const T* const_iterator ;
typedef T value_type;
typedef size_t size_type;
private:
T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
T * ptr_;
size_t size_;
size_t capacity_;
public:
LocalVector() {
init_();
};
LocalVector(const LocalVector<T>& vec) {
init_();
*this = vec;
}
LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster
init_();
while(begin != end) {
push_back(*begin++);
public:
typedef const T* const_iterator ;
typedef T value_type;
typedef size_t size_type;
private:
T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
T * ptr_;
size_t size_;
size_t capacity_;
public:
LocalVector() {
init_();
};
LocalVector(const LocalVector<T>& vec) {
init_();
*this = vec;
}
}
LocalVector(size_t size, const T& t) { // TODO: make it faster
init_();
while(size--) {
push_back(t);
LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster
init_();
while(begin != end) {
push_back(*begin++);
}
}
}
~LocalVector() {
if(ptr_ != buffer_) {
free(ptr_);
LocalVector(size_t size, const T& t) { // TODO: make it faster
init_();
while(size--) {
push_back(t);
}
}
};
public:
LocalVector<T>& operator = (const LocalVector<T>& vec) {
clear();
size_ = vec.size();
capacity_ = vec.capacity();
if(vec.buffer_ == vec.ptr_) {
memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
ptr_ = buffer_;
} else {
ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
assert(ptr_);
memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
~LocalVector() {
if(ptr_ != buffer_) {
free(ptr_);
}
};
public:
LocalVector<T>& operator = (const LocalVector<T>& vec) {
clear();
size_ = vec.size();
capacity_ = vec.capacity();
if(vec.buffer_ == vec.ptr_) {
memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
ptr_ = buffer_;
} else {
ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
assert(ptr_);
memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
}
return *this;
}
return *this;
}
private:
void init_() {
ptr_ = buffer_;
size_ = 0;
capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
}
public:
T& operator [] (size_t i) {
return ptr_[i];
}
const T& operator [] (size_t i) const {
return ptr_[i];
}
void push_back(const T& t) {
if(size_ == capacity_) {
assert(capacity_);
reserve(capacity_ * 2);
private:
void init_() {
ptr_ = buffer_;
size_ = 0;
capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
}
ptr_[size_ ++ ] = t;
}
void reserve(size_t size) {
if(size <= capacity_) {
return;
public:
T& operator [](size_t i) {
return ptr_[i];
}
T * next = (T*)malloc(sizeof(T) * size);
assert(next);
T * old = ptr_;
ptr_ = next;
memcpy(ptr_, old, sizeof(T) * capacity_);
capacity_ = size;
if(old != buffer_) {
free(old);
const T& operator [](size_t i) const {
return ptr_[i];
}
}
bool empty() const {
return 0 == size();
}
size_t size() const {
return size_;
}
size_t capacity() const {
return capacity_;
}
const_iterator begin() const {
return ptr_;
}
const_iterator end() const {
return ptr_ + size_;
}
void clear() {
if(ptr_ != buffer_) {
free(ptr_);
void push_back(const T& t) {
if(size_ == capacity_) {
assert(capacity_);
reserve(capacity_ * 2);
}
ptr_[size_ ++ ] = t;
}
void reserve(size_t size) {
if(size <= capacity_) {
return;
}
T * next = (T*)malloc(sizeof(T) * size);
assert(next);
T * old = ptr_;
ptr_ = next;
memcpy(ptr_, old, sizeof(T) * capacity_);
capacity_ = size;
if(old != buffer_) {
free(old);
}
}
bool empty() const {
return 0 == size();
}
size_t size() const {
return size_;
}
size_t capacity() const {
return capacity_;
}
const_iterator begin() const {
return ptr_;
}
const_iterator end() const {
return ptr_ + size_;
}
void clear() {
if(ptr_ != buffer_) {
free(ptr_);
}
init_();
}
init_();
}
};
template <class T>
ostream & operator << (ostream& os, const LocalVector<T>& vec) {
if(vec.empty()) {
return os << "[]";
}
os<<"[\""<<vec[0];
for(size_t i = 1; i < vec.size(); i++) {
os<<"\", \""<<vec[i];
}
os<<"\"]";
return os;
if(vec.empty()) {
return os << "[]";
}
os << "[\"" << vec[0];
for(size_t i = 1; i < vec.size(); i++) {
os << "\", \"" << vec[i];
}
os << "\"]";
return os;
}
}

View File

@ -32,61 +32,61 @@
#error "XCHECK has been defined already"
#endif // XCHECK
#define XLOG(level) limonp::Logger(limonp::LL_##level, __FILE__, __LINE__).Stream()
#define XLOG(level) limonp::Logger(limonp::LL_##level, __FILE__, __LINE__).Stream()
#define XCHECK(exp) if(!(exp)) XLOG(FATAL) << "exp: ["#exp << "] false. "
namespace limonp {
enum {
LL_DEBUG = 0,
LL_INFO = 1,
LL_WARNING = 2,
LL_ERROR = 3,
LL_FATAL = 4,
LL_DEBUG = 0,
LL_INFO = 1,
LL_WARNING = 2,
LL_ERROR = 3,
LL_FATAL = 4,
}; // enum
static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
static const char * LOG_LEVEL_ARRAY[] = {"DEBUG", "INFO", "WARN", "ERROR", "FATAL"};
static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S";
class Logger {
public:
Logger(size_t level, const char* filename, int lineno)
: level_(level) {
public:
Logger(size_t level, const char* filename, int lineno)
: level_(level) {
#ifdef LOGGING_LEVEL
if (level_ < LOGGING_LEVEL) {
return;
}
if(level_ < LOGGING_LEVEL) {
return;
}
#endif
assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
char buf[32];
time_t now;
time(&now);
strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&now));
stream_ << buf
<< " " << filename
<< ":" << lineno
<< " " << LOG_LEVEL_ARRAY[level_]
<< " ";
}
~Logger() {
#ifdef LOGGING_LEVEL
if (level_ < LOGGING_LEVEL) {
return;
}
#endif
std::cerr << stream_.str() << std::endl;
if (level_ == LL_FATAL) {
abort();
assert(level_ <= sizeof(LOG_LEVEL_ARRAY) / sizeof(*LOG_LEVEL_ARRAY));
char buf[32];
time_t now;
time(&now);
strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&now));
stream_ << buf
<< " " << filename
<< ":" << lineno
<< " " << LOG_LEVEL_ARRAY[level_]
<< " ";
}
~Logger() {
#ifdef LOGGING_LEVEL
if(level_ < LOGGING_LEVEL) {
return;
}
#endif
std::cerr << stream_.str() << std::endl;
if(level_ == LL_FATAL) {
abort();
}
}
}
std::ostream& Stream() {
return stream_;
}
std::ostream& Stream() {
return stream_;
}
private:
std::ostringstream stream_;
size_t level_;
private:
std::ostringstream stream_;
size_t level_;
}; // class Logger
} // namespace limonp

View File

@ -103,313 +103,313 @@ typedef unsigned short int UINT2;
typedef unsigned int UINT4;
static unsigned char PADDING[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
// convenient object that wraps
// the C-functions for use in C++ only
class MD5 {
private:
struct __context_t {
UINT4 state[4]; /* state (ABCD) */
UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */
unsigned char buffer[64]; /* input buffer */
} context ;
private:
struct __context_t {
UINT4 state[4]; /* state (ABCD) */
UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */
unsigned char buffer[64]; /* input buffer */
} context ;
//#pragma region static helper functions
// The core of the MD5 algorithm is here.
// MD5 basic transformation. Transforms state based on block.
static void MD5Transform( UINT4 state[4], unsigned char block[64] ) {
UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
//#pragma region static helper functions
// The core of the MD5 algorithm is here.
// MD5 basic transformation. Transforms state based on block.
static void MD5Transform(UINT4 state[4], unsigned char block[64]) {
UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
Decode (x, block, 64);
Decode(x, block, 64);
/* Round 1 */
FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
/* Round 1 */
FF(a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
FF(d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
FF(c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
FF(b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
FF(a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
FF(d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
FF(c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
FF(b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
FF(a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
FF(d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
FF(c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
FF(b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
FF(a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
FF(d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
FF(c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
FF(b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
/* Round 2 */
GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
/* Round 2 */
GG(a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
GG(d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
GG(c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
GG(b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
GG(a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
GG(d, a, b, c, x[10], S22, 0x2441453); /* 22 */
GG(c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
GG(b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
GG(a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
GG(d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
GG(c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
GG(b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
GG(a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
GG(d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
GG(c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
GG(b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
/* Round 3 */
HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
/* Round 3 */
HH(a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
HH(d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
HH(c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
HH(b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
HH(a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
HH(d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
HH(c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
HH(b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
HH(a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
HH(d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
HH(c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
HH(b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
HH(a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
HH(d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
HH(c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
HH(b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
/* Round 4 */
II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
/* Round 4 */
II(a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
II(d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
II(c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
II(b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
II(a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
II(d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
II(c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
II(b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
II(a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
II(d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
II(c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
II(b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
II(a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
II(d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
II(c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
II(b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
// Zeroize sensitive information.
memset((POINTER)x, 0, sizeof (x));
}
// Encodes input (UINT4) into output (unsigned char). Assumes len is
// a multiple of 4.
static void Encode( unsigned char *output, UINT4 *input, unsigned int len ) {
unsigned int i, j;
for (i = 0, j = 0; j < len; i++, j += 4) {
output[j] = (unsigned char)(input[i] & 0xff);
output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
// Zeroize sensitive information.
memset((POINTER)x, 0, sizeof(x));
}
}
// Decodes input (unsigned char) into output (UINT4). Assumes len is
// a multiple of 4.
static void Decode( UINT4 *output, unsigned char *input, unsigned int len ) {
unsigned int i, j;
// Encodes input (UINT4) into output (unsigned char). Assumes len is
// a multiple of 4.
static void Encode(unsigned char *output, UINT4 *input, unsigned int len) {
unsigned int i, j;
for (i = 0, j = 0; j < len; i++, j += 4)
output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
(((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
}
//#pragma endregion
public:
// MAIN FUNCTIONS
MD5() {
Init() ;
}
// MD5 initialization. Begins an MD5 operation, writing a new context.
void Init() {
context.count[0] = context.count[1] = 0;
// Load magic initialization constants.
context.state[0] = 0x67452301;
context.state[1] = 0xefcdab89;
context.state[2] = 0x98badcfe;
context.state[3] = 0x10325476;
}
// MD5 block update operation. Continues an MD5 message-digest
// operation, processing another message block, and updating the
// context.
void Update(
unsigned char *input, // input block
unsigned int inputLen ) { // length of input block
unsigned int i, index, partLen;
// Compute number of bytes mod 64
index = (unsigned int)((context.count[0] >> 3) & 0x3F);
// Update number of bits
if ((context.count[0] += ((UINT4)inputLen << 3))
< ((UINT4)inputLen << 3))
context.count[1]++;
context.count[1] += ((UINT4)inputLen >> 29);
partLen = 64 - index;
// Transform as many times as possible.
if (inputLen >= partLen) {
memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
MD5Transform (context.state, context.buffer);
for (i = partLen; i + 63 < inputLen; i += 64)
MD5Transform (context.state, &input[i]);
index = 0;
} else
i = 0;
/* Buffer remaining input */
memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen-i);
}
// MD5 finalization. Ends an MD5 message-digest operation, writing the
// the message digest and zeroizing the context.
// Writes to digestRaw
void Final() {
unsigned char bits[8];
unsigned int index, padLen;
// Save number of bits
Encode( bits, context.count, 8 );
// Pad out to 56 mod 64.
index = (unsigned int)((context.count[0] >> 3) & 0x3f);
padLen = (index < 56) ? (56 - index) : (120 - index);
Update( PADDING, padLen );
// Append length (before padding)
Update( bits, 8 );
// Store state in digest
Encode( digestRaw, context.state, 16);
// Zeroize sensitive information.
memset((POINTER)&context, 0, sizeof (context));
writeToString() ;
}
/// Buffer must be 32+1 (nul) = 33 chars long at least
void writeToString() {
int pos ;
for( pos = 0 ; pos < 16 ; pos++ )
sprintf( digestChars+(pos*2), "%02x", digestRaw[pos] ) ;
}
public:
// an MD5 digest is a 16-byte number (32 hex digits)
BYTE digestRaw[ 16 ] ;
// This version of the digest is actually
// a "printf'd" version of the digest.
char digestChars[ 33 ] ;
/// Load a file from disk and digest it
// Digests a file and returns the result.
const char* digestFile( const char *filename ) {
if (NULL == filename || strcmp(filename, "") == 0)
return NULL;
Init() ;
FILE *file;
unsigned char buffer[1024] ;
if((file = fopen (filename, "rb")) == NULL) {
return NULL;
for(i = 0, j = 0; j < len; i++, j += 4) {
output[j] = (unsigned char)(input[i] & 0xff);
output[j + 1] = (unsigned char)((input[i] >> 8) & 0xff);
output[j + 2] = (unsigned char)((input[i] >> 16) & 0xff);
output[j + 3] = (unsigned char)((input[i] >> 24) & 0xff);
}
}
int len;
while( (len = fread( buffer, 1, 1024, file )) )
Update( buffer, len ) ;
Final();
fclose( file );
// Decodes input (unsigned char) into output (UINT4). Assumes len is
// a multiple of 4.
static void Decode(UINT4 *output, unsigned char *input, unsigned int len) {
unsigned int i, j;
return digestChars ;
}
for(i = 0, j = 0; j < len; i++, j += 4)
output[i] = ((UINT4)input[j]) | (((UINT4)input[j + 1]) << 8) |
(((UINT4)input[j + 2]) << 16) | (((UINT4)input[j + 3]) << 24);
}
//#pragma endregion
/// Digests a byte-array already in memory
const char* digestMemory( BYTE *memchunk, int len ) {
if (NULL == memchunk)
return NULL;
Init() ;
Update( memchunk, len ) ;
Final() ;
public:
// MAIN FUNCTIONS
MD5() {
Init() ;
}
return digestChars ;
}
// MD5 initialization. Begins an MD5 operation, writing a new context.
void Init() {
context.count[0] = context.count[1] = 0;
// Digests a string and prints the result.
const char* digestString(const char *string ) {
if (string == NULL)
return NULL;
// Load magic initialization constants.
context.state[0] = 0x67452301;
context.state[1] = 0xefcdab89;
context.state[2] = 0x98badcfe;
context.state[3] = 0x10325476;
}
Init() ;
Update( (unsigned char*)string, strlen(string) ) ;
Final() ;
// MD5 block update operation. Continues an MD5 message-digest
// operation, processing another message block, and updating the
// context.
void Update(
unsigned char *input, // input block
unsigned int inputLen) { // length of input block
unsigned int i, index, partLen;
return digestChars ;
}
// Compute number of bytes mod 64
index = (unsigned int)((context.count[0] >> 3) & 0x3F);
// Update number of bits
if((context.count[0] += ((UINT4)inputLen << 3))
< ((UINT4)inputLen << 3))
context.count[1]++;
context.count[1] += ((UINT4)inputLen >> 29);
partLen = 64 - index;
// Transform as many times as possible.
if(inputLen >= partLen) {
memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
MD5Transform(context.state, context.buffer);
for(i = partLen; i + 63 < inputLen; i += 64)
MD5Transform(context.state, &input[i]);
index = 0;
} else
i = 0;
/* Buffer remaining input */
memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen - i);
}
// MD5 finalization. Ends an MD5 message-digest operation, writing the
// the message digest and zeroizing the context.
// Writes to digestRaw
void Final() {
unsigned char bits[8];
unsigned int index, padLen;
// Save number of bits
Encode(bits, context.count, 8);
// Pad out to 56 mod 64.
index = (unsigned int)((context.count[0] >> 3) & 0x3f);
padLen = (index < 56) ? (56 - index) : (120 - index);
Update(PADDING, padLen);
// Append length (before padding)
Update(bits, 8);
// Store state in digest
Encode(digestRaw, context.state, 16);
// Zeroize sensitive information.
memset((POINTER)&context, 0, sizeof(context));
writeToString() ;
}
/// Buffer must be 32+1 (nul) = 33 chars long at least
void writeToString() {
int pos ;
for(pos = 0 ; pos < 16 ; pos++)
sprintf(digestChars + (pos * 2), "%02x", digestRaw[pos]) ;
}
public:
// an MD5 digest is a 16-byte number (32 hex digits)
BYTE digestRaw[ 16 ] ;
// This version of the digest is actually
// a "printf'd" version of the digest.
char digestChars[ 33 ] ;
/// Load a file from disk and digest it
// Digests a file and returns the result.
const char* digestFile(const char *filename) {
if(NULL == filename || strcmp(filename, "") == 0)
return NULL;
Init() ;
FILE *file;
unsigned char buffer[1024] ;
if((file = fopen(filename, "rb")) == NULL) {
return NULL;
}
int len;
while((len = fread(buffer, 1, 1024, file)))
Update(buffer, len) ;
Final();
fclose(file);
return digestChars ;
}
/// Digests a byte-array already in memory
const char* digestMemory(BYTE *memchunk, int len) {
if(NULL == memchunk)
return NULL;
Init() ;
Update(memchunk, len) ;
Final() ;
return digestChars ;
}
// Digests a string and prints the result.
const char* digestString(const char *string) {
if(string == NULL)
return NULL;
Init() ;
Update((unsigned char*)string, strlen(string)) ;
Final() ;
return digestChars ;
}
};
inline bool md5String(const char* str, std::string& res) {
if (NULL == str) {
res = "";
return false;
}
if(NULL == str) {
res = "";
return false;
}
MD5 md5;
const char *pRes = md5.digestString(str);
if (NULL == pRes) {
res = "";
return false;
}
MD5 md5;
const char *pRes = md5.digestString(str);
if(NULL == pRes) {
res = "";
return false;
}
res = pRes;
return true;
res = pRes;
return true;
}
inline bool md5File(const char* filepath, std::string& res) {
if (NULL == filepath || strcmp(filepath, "") == 0) {
res = "";
return false;
}
if(NULL == filepath || strcmp(filepath, "") == 0) {
res = "";
return false;
}
MD5 md5;
const char *pRes = md5.digestFile(filepath);
MD5 md5;
const char *pRes = md5.digestFile(filepath);
if (NULL == pRes) {
res = "";
return false;
}
if(NULL == pRes) {
res = "";
return false;
}
res = pRes;
return true;
res = pRes;
return true;
}
}
#endif

View File

@ -26,40 +26,40 @@
namespace limonp {
class MutexLock: NonCopyable {
public:
MutexLock() {
XCHECK(!pthread_mutex_init(&mutex_, NULL));
}
~MutexLock() {
XCHECK(!pthread_mutex_destroy(&mutex_));
}
pthread_mutex_t* GetPthreadMutex() {
return &mutex_;
}
public:
MutexLock() {
XCHECK(!pthread_mutex_init(&mutex_, NULL));
}
~MutexLock() {
XCHECK(!pthread_mutex_destroy(&mutex_));
}
pthread_mutex_t* GetPthreadMutex() {
return &mutex_;
}
private:
void Lock() {
XCHECK(!pthread_mutex_lock(&mutex_));
}
void Unlock() {
XCHECK(!pthread_mutex_unlock(&mutex_));
}
friend class MutexLockGuard;
private:
void Lock() {
XCHECK(!pthread_mutex_lock(&mutex_));
}
void Unlock() {
XCHECK(!pthread_mutex_unlock(&mutex_));
}
friend class MutexLockGuard;
pthread_mutex_t mutex_;
pthread_mutex_t mutex_;
}; // class MutexLock
class MutexLockGuard: NonCopyable {
public:
explicit MutexLockGuard(MutexLock & mutex)
: mutex_(mutex) {
mutex_.Lock();
}
~MutexLockGuard() {
mutex_.Unlock();
}
private:
MutexLock & mutex_;
public:
explicit MutexLockGuard(MutexLock & mutex)
: mutex_(mutex) {
mutex_.Lock();
}
~MutexLockGuard() {
mutex_.Unlock();
}
private:
MutexLock & mutex_;
}; // class MutexLockGuard
#define MutexLockGuard(x) XCHECK(false);

View File

@ -22,14 +22,14 @@
namespace limonp {
class NonCopyable {
protected:
NonCopyable() {
}
~NonCopyable() {
}
private:
NonCopyable(const NonCopyable& );
const NonCopyable& operator=(const NonCopyable& );
protected:
NonCopyable() {
}
~NonCopyable() {
}
private:
NonCopyable(const NonCopyable&);
const NonCopyable& operator=(const NonCopyable&);
}; // class NonCopyable
} // namespace limonp

View File

@ -51,123 +51,123 @@ namespace std {
template<typename T>
ostream& operator << (ostream& os, const vector<T>& v) {
if(v.empty()) {
return os << "[]";
}
os<<"["<<v[0];
for(size_t i = 1; i < v.size(); i++) {
os<<", "<<v[i];
}
os<<"]";
return os;
if(v.empty()) {
return os << "[]";
}
os << "[" << v[0];
for(size_t i = 1; i < v.size(); i++) {
os << ", " << v[i];
}
os << "]";
return os;
}
template<>
inline ostream& operator << (ostream& os, const vector<string>& v) {
if(v.empty()) {
return os << "[]";
}
os<<"[\""<<v[0];
for(size_t i = 1; i < v.size(); i++) {
os<<"\", \""<<v[i];
}
os<<"\"]";
return os;
if(v.empty()) {
return os << "[]";
}
os << "[\"" << v[0];
for(size_t i = 1; i < v.size(); i++) {
os << "\", \"" << v[i];
}
os << "\"]";
return os;
}
template<typename T>
ostream& operator << (ostream& os, const deque<T>& dq) {
if(dq.empty()) {
return os << "[]";
}
os<<"[\""<<dq[0];
for(size_t i = 1; i < dq.size(); i++) {
os<<"\", \""<<dq[i];
}
os<<"\"]";
return os;
if(dq.empty()) {
return os << "[]";
}
os << "[\"" << dq[0];
for(size_t i = 1; i < dq.size(); i++) {
os << "\", \"" << dq[i];
}
os << "\"]";
return os;
}
template<class T1, class T2>
ostream& operator << (ostream& os, const pair<T1, T2>& pr) {
os << pr.first << ":" << pr.second ;
return os;
os << pr.first << ":" << pr.second ;
return os;
}
template<class T>
string& operator << (string& str, const T& obj) {
stringstream ss;
ss << obj; // call ostream& operator << (ostream& os,
return str = ss.str();
stringstream ss;
ss << obj; // call ostream& operator << (ostream& os,
return str = ss.str();
}
template<class T1, class T2>
ostream& operator << (ostream& os, const map<T1, T2>& mp) {
if(mp.empty()) {
os<<"{}";
return os;
}
os<<'{';
typename map<T1, T2>::const_iterator it = mp.begin();
os<<*it;
it++;
while(it != mp.end()) {
os<<", "<<*it;
if(mp.empty()) {
os << "{}";
return os;
}
os << '{';
typename map<T1, T2>::const_iterator it = mp.begin();
os << *it;
it++;
}
os<<'}';
return os;
while(it != mp.end()) {
os << ", " << *it;
it++;
}
os << '}';
return os;
}
template<class T1, class T2>
ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp) {
if(mp.empty()) {
return os << "{}";
}
os<<'{';
typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
os<<*it;
it++;
while(it != mp.end()) {
os<<", "<<*it++;
}
return os<<'}';
if(mp.empty()) {
return os << "{}";
}
os << '{';
typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
os << *it;
it++;
while(it != mp.end()) {
os << ", " << *it++;
}
return os << '}';
}
template<class T>
ostream& operator << (ostream& os, const set<T>& st) {
if(st.empty()) {
os << "{}";
return os;
}
os<<'{';
typename set<T>::const_iterator it = st.begin();
os<<*it;
it++;
while(it != st.end()) {
os<<", "<<*it;
if(st.empty()) {
os << "{}";
return os;
}
os << '{';
typename set<T>::const_iterator it = st.begin();
os << *it;
it++;
}
os<<'}';
return os;
while(it != st.end()) {
os << ", " << *it;
it++;
}
os << '}';
return os;
}
template<class KeyType, class ContainType>
bool IsIn(const ContainType& contain, const KeyType& key) {
return contain.end() != contain.find(key);
return contain.end() != contain.find(key);
}
template<class T>
basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs) {
return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
}
template<class T>
ofstream & operator << (ofstream & ofs, const basic_string<T>& s) {
ostreambuf_iterator<T> itr (ofs);
copy(s.begin(), s.end(), itr);
return ofs;
ostreambuf_iterator<T> itr(ofs);
copy(s.begin(), s.end(), itr);
return ofs;
}
} // namespace std

View File

@ -44,339 +44,339 @@
namespace limonp {
using namespace std;
inline string StringFormat(const char* fmt, ...) {
int size = 256;
std::string str;
va_list ap;
while (1) {
str.resize(size);
va_start(ap, fmt);
int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
va_end(ap);
if (n > -1 && n < size) {
str.resize(n);
return str;
int size = 256;
std::string str;
va_list ap;
while(1) {
str.resize(size);
va_start(ap, fmt);
int n = vsnprintf((char *)str.c_str(), size, fmt, ap);
va_end(ap);
if(n > -1 && n < size) {
str.resize(n);
return str;
}
if(n > -1)
size = n + 1;
else
size *= 2;
}
if (n > -1)
size = n + 1;
else
size *= 2;
}
return str;
return str;
}
template<class T>
void Join(T begin, T end, string& res, const string& connector) {
if(begin == end) {
return;
}
stringstream ss;
ss<<*begin;
begin++;
while(begin != end) {
ss << connector << *begin;
begin ++;
}
res = ss.str();
if(begin == end) {
return;
}
stringstream ss;
ss << *begin;
begin++;
while(begin != end) {
ss << connector << *begin;
begin ++;
}
res = ss.str();
}
template<class T>
string Join(T begin, T end, const string& connector) {
string res;
Join(begin ,end, res, connector);
return res;
string res;
Join(begin, end, res, connector);
return res;
}
inline string& Upper(string& str) {
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
return str;
transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper);
return str;
}
inline string& Lower(string& str) {
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
return str;
transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
return str;
}
inline bool IsSpace(unsigned c) {
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
return c > 0xff ? false : std::isspace(c & 0xff) != 0;
// when passing large int as the argument of isspace, it core dump, so here need a type cast.
return c > 0xff ? false : std::isspace(c & 0xff) != 0;
}
inline std::string& LTrim(std::string &s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
return s;
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))));
return s;
}
inline std::string& RTrim(std::string &s) {
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
return s;
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<unsigned, bool>(IsSpace))).base(), s.end());
return s;
}
inline std::string& Trim(std::string &s) {
return LTrim(RTrim(s));
return LTrim(RTrim(s));
}
inline std::string& LTrim(std::string & s, char x) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
return s;
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
return s;
}
inline std::string& RTrim(std::string & s, char x) {
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
return s;
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
return s;
}
inline std::string& Trim(std::string &s, char x) {
return LTrim(RTrim(s, x), x);
return LTrim(RTrim(s, x), x);
}
inline void Split(const string& src, vector<string>& res, const string& pattern, size_t maxsplit = string::npos) {
res.clear();
size_t Start = 0;
size_t end = 0;
string sub;
while(Start < src.size()) {
end = src.find_first_of(pattern, Start);
if(string::npos == end || res.size() >= maxsplit) {
sub = src.substr(Start);
res.push_back(sub);
return;
res.clear();
size_t Start = 0;
size_t end = 0;
string sub;
while(Start < src.size()) {
end = src.find_first_of(pattern, Start);
if(string::npos == end || res.size() >= maxsplit) {
sub = src.substr(Start);
res.push_back(sub);
return;
}
sub = src.substr(Start, end - Start);
res.push_back(sub);
Start = end + 1;
}
sub = src.substr(Start, end - Start);
res.push_back(sub);
Start = end + 1;
}
return;
return;
}
inline vector<string> Split(const string& src, const string& pattern, size_t maxsplit = string::npos) {
vector<string> res;
Split(src, res, pattern, maxsplit);
return res;
vector<string> res;
Split(src, res, pattern, maxsplit);
return res;
}
inline bool StartsWith(const string& str, const string& prefix) {
if(prefix.length() > str.length()) {
return false;
}
return 0 == str.compare(0, prefix.length(), prefix);
if(prefix.length() > str.length()) {
return false;
}
return 0 == str.compare(0, prefix.length(), prefix);
}
inline bool EndsWith(const string& str, const string& suffix) {
if(suffix.length() > str.length()) {
return false;
}
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
if(suffix.length() > str.length()) {
return false;
}
return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix);
}
inline bool IsInStr(const string& str, char ch) {
return str.find(ch) != string::npos;
return str.find(ch) != string::npos;
}
inline uint16_t TwocharToUint16(char high, char low) {
return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
return (((uint16_t(high) & 0x00ff) << 8) | (uint16_t(low) & 0x00ff));
}
template <class Uint16Container>
bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
if(!str) {
return false;
}
char ch1, ch2;
uint16_t tmp;
vec.clear();
for(size_t i = 0; i < len;) {
if(!(str[i] & 0x80)) { // 0xxxxxxx
vec.push_back(str[i]);
i++;
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
ch1 = (str[i] >> 2) & 0x07;
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
vec.push_back(tmp);
i += 2;
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
ch1 = ((uint8_t)str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
ch2 = (((uint8_t)str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff));
vec.push_back(tmp);
i += 3;
} else {
return false;
if(!str) {
return false;
}
}
return true;
char ch1, ch2;
uint16_t tmp;
vec.clear();
for(size_t i = 0; i < len;) {
if(!(str[i] & 0x80)) { // 0xxxxxxx
vec.push_back(str[i]);
i++;
} else if((uint8_t)str[i] <= 0xdf && i + 1 < len) { // 110xxxxxx
ch1 = (str[i] >> 2) & 0x07;
ch2 = (str[i + 1] & 0x3f) | ((str[i] & 0x03) << 6);
tmp = (((uint16_t(ch1) & 0x00ff) << 8) | (uint16_t(ch2) & 0x00ff));
vec.push_back(tmp);
i += 2;
} else if((uint8_t)str[i] <= 0xef && i + 2 < len) {
ch1 = ((uint8_t)str[i] << 4) | ((str[i + 1] >> 2) & 0x0f);
ch2 = (((uint8_t)str[i + 1] << 6) & 0xc0) | (str[i + 2] & 0x3f);
tmp = (((uint16_t(ch1) & 0x00ff) << 8) | (uint16_t(ch2) & 0x00ff));
vec.push_back(tmp);
i += 3;
} else {
return false;
}
}
return true;
}
template <class Uint16Container>
bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
return Utf8ToUnicode(str.c_str(), str.size(), vec);
return Utf8ToUnicode(str.c_str(), str.size(), vec);
}
template <class Uint32Container>
bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
uint32_t tmp;
vec.clear();
for(size_t i = 0; i < str.size();) {
if(!(str[i] & 0x80)) { // 0xxxxxxx
// 7bit, total 7bit
tmp = (uint8_t)(str[i]) & 0x7f;
i++;
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
// 5bit, total 5bit
tmp = (uint8_t)(str[i]) & 0x1f;
uint32_t tmp;
vec.clear();
for(size_t i = 0; i < str.size();) {
if(!(str[i] & 0x80)) { // 0xxxxxxx
// 7bit, total 7bit
tmp = (uint8_t)(str[i]) & 0x7f;
i++;
} else if((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
// 5bit, total 5bit
tmp = (uint8_t)(str[i]) & 0x1f;
// 6bit, total 11bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+1]) & 0x3f;
i += 2;
} else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
// 4bit, total 4bit
tmp = (uint8_t)(str[i]) & 0x0f;
// 6bit, total 11bit
tmp <<= 6;
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
i += 2;
} else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
// 4bit, total 4bit
tmp = (uint8_t)(str[i]) & 0x0f;
// 6bit, total 10bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+1]) & 0x3f;
// 6bit, total 10bit
tmp <<= 6;
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
// 6bit, total 16bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+2]) & 0x3f;
// 6bit, total 16bit
tmp <<= 6;
tmp |= (uint8_t)(str[i + 2]) & 0x3f;
i += 3;
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
// 3bit, total 3bit
tmp = (uint8_t)(str[i]) & 0x07;
i += 3;
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
// 3bit, total 3bit
tmp = (uint8_t)(str[i]) & 0x07;
// 6bit, total 9bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+1]) & 0x3f;
// 6bit, total 9bit
tmp <<= 6;
tmp |= (uint8_t)(str[i + 1]) & 0x3f;
// 6bit, total 15bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+2]) & 0x3f;
// 6bit, total 15bit
tmp <<= 6;
tmp |= (uint8_t)(str[i + 2]) & 0x3f;
// 6bit, total 21bit
tmp <<= 6;
tmp |= (uint8_t)(str[i+3]) & 0x3f;
// 6bit, total 21bit
tmp <<= 6;
tmp |= (uint8_t)(str[i + 3]) & 0x3f;
i += 4;
} else {
return false;
i += 4;
} else {
return false;
}
vec.push_back(tmp);
}
vec.push_back(tmp);
}
return true;
return true;
}
template <class Uint32ContainerConIter>
void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
res.clear();
uint32_t ui;
while(begin != end) {
ui = *begin;
if(ui <= 0x7f) {
res += char(ui);
} else if(ui <= 0x7ff) {
res += char(((ui >> 6) & 0x1f) | 0xc0);
res += char((ui & 0x3f) | 0x80);
} else if(ui <= 0xffff) {
res += char(((ui >> 12) & 0x0f) | 0xe0);
res += char(((ui >> 6) & 0x3f) | 0x80);
res += char((ui & 0x3f) | 0x80);
} else {
res += char(((ui >> 18) & 0x03) | 0xf0);
res += char(((ui >> 12) & 0x3f) | 0x80);
res += char(((ui >> 6) & 0x3f) | 0x80);
res += char((ui & 0x3f) | 0x80);
res.clear();
uint32_t ui;
while(begin != end) {
ui = *begin;
if(ui <= 0x7f) {
res += char(ui);
} else if(ui <= 0x7ff) {
res += char(((ui >> 6) & 0x1f) | 0xc0);
res += char((ui & 0x3f) | 0x80);
} else if(ui <= 0xffff) {
res += char(((ui >> 12) & 0x0f) | 0xe0);
res += char(((ui >> 6) & 0x3f) | 0x80);
res += char((ui & 0x3f) | 0x80);
} else {
res += char(((ui >> 18) & 0x03) | 0xf0);
res += char(((ui >> 12) & 0x3f) | 0x80);
res += char(((ui >> 6) & 0x3f) | 0x80);
res += char((ui & 0x3f) | 0x80);
}
begin ++;
}
begin ++;
}
}
template <class Uint16ContainerConIter>
void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
res.clear();
uint16_t ui;
while(begin != end) {
ui = *begin;
if(ui <= 0x7f) {
res += char(ui);
} else if(ui <= 0x7ff) {
res += char(((ui>>6) & 0x1f) | 0xc0);
res += char((ui & 0x3f) | 0x80);
} else {
res += char(((ui >> 12) & 0x0f )| 0xe0);
res += char(((ui>>6) & 0x3f )| 0x80 );
res += char((ui & 0x3f) | 0x80);
res.clear();
uint16_t ui;
while(begin != end) {
ui = *begin;
if(ui <= 0x7f) {
res += char(ui);
} else if(ui <= 0x7ff) {
res += char(((ui >> 6) & 0x1f) | 0xc0);
res += char((ui & 0x3f) | 0x80);
} else {
res += char(((ui >> 12) & 0x0f) | 0xe0);
res += char(((ui >> 6) & 0x3f) | 0x80);
res += char((ui & 0x3f) | 0x80);
}
begin ++;
}
begin ++;
}
}
template <class Uint16Container>
bool GBKTrans(const char* const str, size_t len, Uint16Container& vec) {
vec.clear();
if(!str) {
return true;
}
size_t i = 0;
while(i < len) {
if(0 == (str[i] & 0x80)) {
vec.push_back(uint16_t(str[i]));
i++;
} else {
if(i + 1 < len) { //&& (str[i+1] & 0x80))
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff));
vec.push_back(tmp);
i += 2;
} else {
return false;
}
vec.clear();
if(!str) {
return true;
}
}
return true;
size_t i = 0;
while(i < len) {
if(0 == (str[i] & 0x80)) {
vec.push_back(uint16_t(str[i]));
i++;
} else {
if(i + 1 < len) { //&& (str[i+1] & 0x80))
uint16_t tmp = (((uint16_t(str[i]) & 0x00ff) << 8) | (uint16_t(str[i + 1]) & 0x00ff));
vec.push_back(tmp);
i += 2;
} else {
return false;
}
}
}
return true;
}
template <class Uint16Container>
bool GBKTrans(const string& str, Uint16Container& vec) {
return GBKTrans(str.c_str(), str.size(), vec);
return GBKTrans(str.c_str(), str.size(), vec);
}
template <class Uint16ContainerConIter>
void GBKTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
res.clear();
//pair<char, char> pa;
char first, second;
while(begin != end) {
//pa = uint16ToChar2(*begin);
first = ((*begin)>>8) & 0x00ff;
second = (*begin) & 0x00ff;
if(first & 0x80) {
res += first;
res += second;
} else {
res += second;
res.clear();
//pair<char, char> pa;
char first, second;
while(begin != end) {
//pa = uint16ToChar2(*begin);
first = ((*begin) >> 8) & 0x00ff;
second = (*begin) & 0x00ff;
if(first & 0x80) {
res += first;
res += second;
} else {
res += second;
}
begin++;
}
begin++;
}
}
/*
* format example: "%Y-%m-%d %H:%M:%S"
*/
inline void GetTime(const string& format, string& timeStr) {
time_t timeNow;
time(&timeNow);
timeStr.resize(64);
size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
timeStr.resize(len);
time_t timeNow;
time(&timeNow);
timeStr.resize(64);
size_t len = strftime((char*)timeStr.c_str(), timeStr.size(), format.c_str(), localtime(&timeNow));
timeStr.resize(len);
}
inline string PathJoin(const string& path1, const string& path2) {
if(EndsWith(path1, "/")) {
return path1 + path2;
}
return path1 + "/" + path2;
if(EndsWith(path1, "/")) {
return path1 + path2;
}
return path1 + "/" + path2;
}
}

View File

@ -25,36 +25,36 @@
namespace limonp {
class IThread: NonCopyable {
public:
IThread(): isStarted(false), isJoined(false) {
}
virtual ~IThread() {
if(isStarted && !isJoined) {
XCHECK(!pthread_detach(thread_));
public:
IThread(): isStarted(false), isJoined(false) {
}
};
virtual ~IThread() {
if(isStarted && !isJoined) {
XCHECK(!pthread_detach(thread_));
}
};
virtual void Run() = 0;
void Start() {
XCHECK(!isStarted);
XCHECK(!pthread_create(&thread_, NULL, Worker, this));
isStarted = true;
}
void Join() {
XCHECK(!isJoined);
XCHECK(!pthread_join(thread_, NULL));
isJoined = true;
}
private:
static void * Worker(void * data) {
IThread * ptr = (IThread* ) data;
ptr->Run();
return NULL;
}
virtual void Run() = 0;
void Start() {
XCHECK(!isStarted);
XCHECK(!pthread_create(&thread_, NULL, Worker, this));
isStarted = true;
}
void Join() {
XCHECK(!isJoined);
XCHECK(!pthread_join(thread_, NULL));
isJoined = true;
}
private:
static void * Worker(void * data) {
IThread * ptr = (IThread*) data;
ptr->Run();
return NULL;
}
pthread_t thread_;
bool isStarted;
bool isJoined;
pthread_t thread_;
bool isStarted;
bool isJoined;
}; // class IThread
} // namespace limonp

View File

@ -30,73 +30,73 @@ using namespace std;
//class ThreadPool;
class ThreadPool: NonCopyable {
public:
class Worker: public IThread {
public:
Worker(ThreadPool* pool): ptThreadPool_(pool) {
assert(ptThreadPool_);
}
virtual ~Worker() {
}
virtual void Run() {
while (true) {
ClosureInterface* closure = ptThreadPool_->queue_.Pop();
if (closure == NULL) {
break;
public:
class Worker: public IThread {
public:
Worker(ThreadPool* pool): ptThreadPool_(pool) {
assert(ptThreadPool_);
}
try {
closure->Run();
} catch(std::exception& e) {
XLOG(ERROR) << e.what();
} catch(...) {
XLOG(ERROR) << " unknown exception.";
virtual ~Worker() {
}
delete closure;
}
}
private:
ThreadPool * ptThreadPool_;
}; // class Worker
ThreadPool(size_t thread_num)
: threads_(thread_num),
queue_(thread_num) {
assert(thread_num);
for(size_t i = 0; i < threads_.size(); i ++) {
threads_[i] = new Worker(this);
}
}
~ThreadPool() {
Stop();
}
virtual void Run() {
while(true) {
ClosureInterface* closure = ptThreadPool_->queue_.Pop();
if(closure == NULL) {
break;
}
try {
closure->Run();
} catch(std::exception& e) {
XLOG(ERROR) << e.what();
} catch(...) {
XLOG(ERROR) << " unknown exception.";
}
delete closure;
}
}
private:
ThreadPool * ptThreadPool_;
}; // class Worker
void Start() {
for(size_t i = 0; i < threads_.size(); i++) {
threads_[i]->Start();
ThreadPool(size_t thread_num)
: threads_(thread_num),
queue_(thread_num) {
assert(thread_num);
for(size_t i = 0; i < threads_.size(); i ++) {
threads_[i] = new Worker(this);
}
}
}
void Stop() {
for(size_t i = 0; i < threads_.size(); i ++) {
queue_.Push(NULL);
~ThreadPool() {
Stop();
}
for(size_t i = 0; i < threads_.size(); i ++) {
threads_[i]->Join();
delete threads_[i];
void Start() {
for(size_t i = 0; i < threads_.size(); i++) {
threads_[i]->Start();
}
}
void Stop() {
for(size_t i = 0; i < threads_.size(); i ++) {
queue_.Push(NULL);
}
for(size_t i = 0; i < threads_.size(); i ++) {
threads_[i]->Join();
delete threads_[i];
}
threads_.clear();
}
threads_.clear();
}
void Add(ClosureInterface* task) {
assert(task);
queue_.Push(task);
}
void Add(ClosureInterface* task) {
assert(task);
queue_.Push(task);
}
private:
friend class Worker;
private:
friend class Worker;
vector<IThread*> threads_;
BoundedBlockingQueue<ClosureInterface*> queue_;
vector<IThread*> threads_;
BoundedBlockingQueue<ClosureInterface*> queue_;
}; // class ThreadPool
} // namespace limonp

View File

@ -35,10 +35,16 @@ INSTALLS += \
# Default rules for deployment.
unix {
target.path = /usr/lib
target.path = $$[QT_INSTALL_LIBS]
}
!isEmpty(target.path): INSTALLS += target
header.path = /usr/include/chinese-seg/
header.files += *.h
headercppjieba.path = /usr/include/chinese-seg/cppjieba/
headercppjieba.files = cppjieba/*
INSTALLS += header headercppjieba
#DISTFILES += \
# jiaba/jieba.pri

View File

@ -30,19 +30,18 @@ break;
println("+---------------------------------------------------------------+");
//read a line from a command line.
static fstring getLine( FILE *fp, fstring __dst )
{
static fstring getLine(FILE *fp, fstring __dst) {
register int c;
register fstring cs;
cs = __dst;
while ( ( c = getc( fp ) ) != EOF ) {
if ( c == '\n' ) break;
while((c = getc(fp)) != EOF) {
if(c == '\n') break;
*cs++ = c;
}
*cs = '\0';
return ( c == EOF && cs == __dst ) ? NULL : __dst;
return (c == EOF && cs == __dst) ? NULL : __dst;
}
/*static void printcode( fstring str ) {
@ -56,8 +55,7 @@ static fstring getLine( FILE *fp, fstring __dst )
}*/
//int friso_test(int argc, char **argv)
int friso_test()
{
int friso_test() {
clock_t s_time, e_time;
char line[__INPUT_LENGTH__] = {0};
@ -76,7 +74,7 @@ int friso_test()
// }
__path__ = "/usr/share/ukui-search/res/friso.ini";
if ( __path__ == NULL ) {
if(__path__ == NULL) {
println("Usage: friso -init lexicon path");
exit(0);
}
@ -90,12 +88,12 @@ int friso_test()
friso_dic_load_from_ifile( dic, __path__, __LENGTH__ );
friso_set_dic( friso, dic );
friso_set_mode( friso, __FRISO_COMPLEX_MODE__ );*/
if ( friso_init_from_ifile(friso, config, __path__) != 1 ) {
if(friso_init_from_ifile(friso, config, __path__) != 1) {
printf("fail to initialize friso and config.\n");
goto err;
}
switch ( config->mode ) {
switch(config->mode) {
case __FRISO_SIMPLE_MODE__:
mode = "Simple";
break;
@ -114,29 +112,29 @@ int friso_test()
e_time = clock();
printf("Initialized in %fsec\n", (double) ( e_time - s_time ) / CLOCKS_PER_SEC );
printf("Initialized in %fsec\n", (double)(e_time - s_time) / CLOCKS_PER_SEC);
printf("Mode: %s\n", mode);
printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK" );
printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK");
___ABOUT___;
//set the task.
task = friso_new_task();
while ( 1 ) {
while(1) {
print("friso>> ");
getLine( stdin, line );
getLine(stdin, line);
//exit the programe
if (strcasecmp( line, "quit") == 0) {
if(strcasecmp(line, "quit") == 0) {
___EXIT_INFO___
}
//for ( i = 0; i < 1000000; i++ ) {
//set the task text.
friso_set_text( task, line );
friso_set_text(task, line);
println("分词结果:");
s_time = clock();
while ( ( config->next_token( friso, config, task ) ) != NULL ) {
while((config->next_token(friso, config, task)) != NULL) {
printf(
"%s[%d, %d, %d] ",
task->token->word,
@ -148,7 +146,7 @@ int friso_test()
}
//}
e_time = clock();
printf("\nDone, cost < %fsec\n", ( (double)(e_time - s_time) ) / CLOCKS_PER_SEC );
printf("\nDone, cost < %fsec\n", ((double)(e_time - s_time)) / CLOCKS_PER_SEC);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
/*
* main interface file for friso tokenizer.
* you could modify it and re-release and free for commercial use.
*
*
* @author lionsoul<chenxin619315@gmail.com>
*/
@ -25,7 +25,7 @@
/*
* Type: friso_lex_t
* -----------
* This type used to represent the type of the lexicon.
* This type used to represent the type of the lexicon.
*/
typedef enum {
__LEX_CJK_WORDS__ = 0,
@ -47,7 +47,7 @@ typedef enum {
} friso_lex_t;
typedef friso_hash_t * friso_dic_t;
#define __FRISO_LEXICON_LENGTH__ 12
#define __FRISO_LEXICON_LENGTH__ 12
//charset that Friso now support.
@ -59,7 +59,7 @@ typedef enum {
/*
* Type: friso_mode_t
* ------------------
* use to identidy the mode that the friso use.
* use to identidy the mode that the friso use.
*/
typedef enum {
__FRISO_SIMPLE_MODE__ = 1,
@ -79,7 +79,7 @@ typedef friso_entry * friso_t;
/*
* Type: lex_entry_cdt
* -------------------
* This type used to represent the lexicon entry struct.
* This type used to represent the lexicon entry struct.
*/
#define _LEX_APPENSYN_MASK (1 << 0) //append synoyums words.
#define lex_appensyn_open(e) e->ctrlMask |= _LEX_APPENSYN_MASK
@ -123,7 +123,7 @@ typedef friso_token_entry * friso_token_t;
/*
* Type: friso_task_entry
* This type used to represent the current segmentation content.
* like the text to split, and the current index, token buffer eg....
* like the text to split, and the current index, token buffer eg....
*/
//action control mask for #FRISO_TASK_T#.
#define _TASK_CHECK_CF_MASK (1 << 0) //Wether to check the chinese fraction.
@ -166,9 +166,9 @@ struct friso_config_struct {
friso_mode_t mode; //Complex mode or simple mode
//pointer to the function to get the next token
friso_token_t (*next_token) (friso_t, struct friso_config_struct *, friso_task_t);
friso_token_t (*next_token)(friso_t, struct friso_config_struct *, friso_task_t);
//pointer to the function to get the next cjk lex_entry_t
lex_entry_t (*next_cjk ) (friso_t, struct friso_config_struct *, friso_task_t);
lex_entry_t (*next_cjk)(friso_t, struct friso_config_struct *, friso_task_t);
char kpuncs[_FRISO_KEEP_PUNC_LEN]; //keep punctuations buffer.
};
@ -181,28 +181,28 @@ typedef friso_config_entry * friso_config_t;
* Function: friso_new;
* Usage: vars = friso_new( void );
* --------------------------------
* This function used to create a new empty friso friso_t;
* This function used to create a new empty friso friso_t;
* with default value.
*/
FRISO_API friso_t friso_new( void );
FRISO_API friso_t friso_new(void);
//creat a friso entry with a default value from a configuratile file.
//@return 1 for successfully and 0 for failed.
FRISO_API int friso_init_from_ifile( friso_t, friso_config_t, fstring );
FRISO_API int friso_init_from_ifile(friso_t, friso_config_t, fstring);
/*
* Function: friso_free_vars;
* Usage: friso_free( vars );
* --------------------------
* This function is used to free the allocation of the given vars.
* This function is used to free the allocation of the given vars.
*/
FRISO_API void friso_free( friso_t );
FRISO_API void friso_free(friso_t);
/*
* Function: friso_set_dic
* Usage: dic = friso_set_dic( vars, dic );
* ----------------------------------------
* This function is used to set the dictionary for friso.
* This function is used to set the dictionary for friso.
* and firso_dic_t is the pointer of a hash table array.
*/
//FRISO_API void friso_set_dic( friso_t, friso_dic_t );
@ -217,14 +217,14 @@ do {\
* ------------------------------------
* This function is used to set the mode(complex or simple) that you want to friso to use.
*/
FRISO_API void friso_set_mode( friso_config_t, friso_mode_t );
FRISO_API void friso_set_mode(friso_config_t, friso_mode_t);
/*create a new friso configuration entry and initialize
/*create a new friso configuration entry and initialize
it with the default value.*/
FRISO_API friso_config_t friso_new_config( void );
FRISO_API friso_config_t friso_new_config(void);
//initialize the specified friso config entry with default value.
FRISO_API void friso_init_config( friso_config_t );
FRISO_API void friso_init_config(friso_config_t);
//free the specified friso configuration entry.
//FRISO_API void friso_free_config( friso_config_t );
@ -234,20 +234,20 @@ FRISO_API void friso_init_config( friso_config_t );
* Function: friso_new_task;
* Usage: segment = friso_new_task( void );
* ----------------------------------------
* This function is used to create a new friso segment type;
* This function is used to create a new friso segment type;
*/
FRISO_API friso_task_t friso_new_task( void );
FRISO_API friso_task_t friso_new_task(void);
/*
* Function: friso_free_task;
* Usage: friso_free_task( task );
* Usage: friso_free_task( task );
* -------------------------------
* This function is used to free the allocation of function friso_new_segment();
*/
FRISO_API void friso_free_task( friso_task_t );
FRISO_API void friso_free_task(friso_task_t);
//create a new friso token
FRISO_API friso_token_t friso_new_token( void );
FRISO_API friso_token_t friso_new_token(void);
//free the given friso token
//FRISO_API void friso_free_token( friso_token_t );
@ -257,16 +257,16 @@ FRISO_API friso_token_t friso_new_token( void );
* Function: friso_set_text
* Usage: friso_set_text( task, text );
* ------------------------------------
* This function is used to set the text that is going to segment.
* This function is used to set the text that is going to segment.
*/
FRISO_API void friso_set_text( friso_task_t, fstring );
FRISO_API void friso_set_text(friso_task_t, fstring);
//get the next cjk word with mmseg simple mode
FRISO_API lex_entry_t next_simple_cjk( friso_t, friso_config_t, friso_task_t );
FRISO_API lex_entry_t next_simple_cjk(friso_t, friso_config_t, friso_task_t);
//get the next cjk word with mmseg complex mode(mmseg core algorithm)
FRISO_API lex_entry_t next_complex_cjk( friso_t, friso_config_t, friso_task_t );
FRISO_API lex_entry_t next_complex_cjk(friso_t, friso_config_t, friso_task_t);
/*
* Function: next_mmseg_token
@ -275,10 +275,10 @@ FRISO_API lex_entry_t next_complex_cjk( friso_t, friso_config_t, friso_task_t );
* This function is used to get next word that friso segmented
* with a split mode of __FRISO_SIMPLE_MODE__ or __FRISO_COMPLEX_MODE__
*/
FRISO_API friso_token_t next_mmseg_token( friso_t, friso_config_t, friso_task_t );
FRISO_API friso_token_t next_mmseg_token(friso_t, friso_config_t, friso_task_t);
//__FRISO_DETECT_MODE__
FRISO_API friso_token_t next_detect_token( friso_t, friso_config_t, friso_task_t );
FRISO_API friso_token_t next_detect_token(friso_t, friso_config_t, friso_task_t);
/* }}} friso main interface define :: end*/
/* {{{ lexicon interface define :: start*/
@ -289,42 +289,42 @@ FRISO_API friso_token_t next_detect_token( friso_t, friso_config_t, friso_task_t
* -----------------------------
* This function used to create a new dictionary.(memory allocation).
*/
FRISO_API friso_dic_t friso_dic_new( void );
FRISO_API friso_dic_t friso_dic_new(void);
FRISO_API fstring file_get_line( fstring, FILE * );
FRISO_API fstring file_get_line(fstring, FILE *);
/*
* Function: friso_dic_free
* Usage: friso_dic_free( void );
* ------------------------------
* This function is used to free all the allocation of friso_dic_new.
* This function is used to free all the allocation of friso_dic_new.
*/
FRISO_API void friso_dic_free( friso_dic_t );
FRISO_API void friso_dic_free(friso_dic_t);
//create a new lexicon entry.
FRISO_API lex_entry_t new_lex_entry( fstring, friso_array_t, uint_t, uint_t, uint_t );
FRISO_API lex_entry_t new_lex_entry(fstring, friso_array_t, uint_t, uint_t, uint_t);
//free the given lexicon entry.
//free all the allocations that its synonyms word's items pointed to
//when the second arguments is 1
FRISO_API void free_lex_entry_full( lex_entry_t );
FRISO_API void free_lex_entry( lex_entry_t );
FRISO_API void free_lex_entry_full(lex_entry_t);
FRISO_API void free_lex_entry(lex_entry_t);
/*
* Function: friso_dic_load
* Usage: friso_dic_load( friso, friso_lex_t, path, length );
* Usage: friso_dic_load( friso, friso_lex_t, path, length );
* --------------------------------------------------
* This function is used to load dictionary from a given path.
* no length limit when length less than 0.
*/
FRISO_API void friso_dic_load( friso_t, friso_config_t,
friso_lex_t, fstring, uint_t );
FRISO_API void friso_dic_load(friso_t, friso_config_t,
friso_lex_t, fstring, uint_t);
/*
* load the lexicon configuration file.
* and load all the valid lexicon from the conf file.
*/
FRISO_API void friso_dic_load_from_ifile( friso_t, friso_config_t, fstring, uint_t );
FRISO_API void friso_dic_load_from_ifile(friso_t, friso_config_t, fstring, uint_t);
/*
* Function: friso_dic_match
@ -332,7 +332,7 @@ FRISO_API void friso_dic_load_from_ifile( friso_t, friso_config_t, fstring, uint
* ----------------------------------------------
* This function used to put new word into the dictionary.
*/
FRISO_API void friso_dic_add( friso_dic_t, friso_lex_t, fstring, friso_array_t );
FRISO_API void friso_dic_add(friso_dic_t, friso_lex_t, fstring, friso_array_t);
/*
* Function: friso_dic_add_with_fre
@ -340,15 +340,15 @@ FRISO_API void friso_dic_add( friso_dic_t, friso_lex_t, fstring, friso_array_t )
* -------------------------------------------------------------------
* This function used to put new word width frequency into the dictionary.
*/
FRISO_API void friso_dic_add_with_fre( friso_dic_t, friso_lex_t, fstring, friso_array_t, uint_t );
FRISO_API void friso_dic_add_with_fre(friso_dic_t, friso_lex_t, fstring, friso_array_t, uint_t);
/*
* Function: friso_dic_match
* Usage: result = friso_dic_match( dic, friso_lex_t, word );
* ----------------------------------------------------
* This function is used to check the given word is in the dictionary or not.
* This function is used to check the given word is in the dictionary or not.
*/
FRISO_API int friso_dic_match( friso_dic_t, friso_lex_t, fstring );
FRISO_API int friso_dic_match(friso_dic_t, friso_lex_t, fstring);
/*
* Function: friso_dic_get
@ -356,15 +356,15 @@ FRISO_API int friso_dic_match( friso_dic_t, friso_lex_t, fstring );
* -----------------------------------------
* This function is used to search the specified lex_entry_t.
*/
FRISO_API lex_entry_t friso_dic_get( friso_dic_t, friso_lex_t, fstring );
FRISO_API lex_entry_t friso_dic_get(friso_dic_t, friso_lex_t, fstring);
/*
* Function: friso_spec_dic_size
* Usage: friso_spec_dic_size( dic, friso_lex_t )
* This function is used to get the size of the dictionary with a specified type.
* This function is used to get the size of the dictionary with a specified type.
*/
FRISO_API uint_t friso_spec_dic_size( friso_dic_t, friso_lex_t );
FRISO_API uint_t friso_all_dic_size( friso_dic_t );
FRISO_API uint_t friso_spec_dic_size(friso_dic_t, friso_lex_t);
FRISO_API uint_t friso_all_dic_size(friso_dic_t);
/* }}} lexicon interface define :: end*/
#endif /*end ifndef*/

View File

@ -4,7 +4,7 @@
* 2. hashmap interface.
* 3. dynamaic array interface.
* 4. double link list interface.
*
*
* @author chenxin <chenxin619315@gmail.com>
*/
@ -68,45 +68,45 @@ typedef string_buffer_entry * string_buffer_t;
//FRISO_API string_buffer_t new_string_buffer( void );
#define new_string_buffer() \
new_string_buffer_with_opacity( __DEFAULT_ARRAY_LIST_OPACITY__ );
FRISO_API string_buffer_t new_string_buffer_with_opacity( uint_t );
FRISO_API string_buffer_t new_string_buffer_with_string( fstring str );
FRISO_API string_buffer_t new_string_buffer_with_opacity(uint_t);
FRISO_API string_buffer_t new_string_buffer_with_string(fstring str);
/*
* this function will copy the chars that the fstring pointed.
* to the buffer.
* this may cause the resize action of the buffer.
*/
FRISO_API void string_buffer_append( string_buffer_t, fstring );
FRISO_API void string_buffer_append_char( string_buffer_t, char );
FRISO_API void string_buffer_append(string_buffer_t, fstring);
FRISO_API void string_buffer_append_char(string_buffer_t, char);
//insert the given fstring from the specified position.
FRISO_API void string_buffer_insert( string_buffer_t, uint_t idx, fstring );
FRISO_API void string_buffer_insert(string_buffer_t, uint_t idx, fstring);
//remove the char in the specified position.
FRISO_API fstring string_buffer_remove( string_buffer_t, uint_t idx, uint_t );
FRISO_API fstring string_buffer_remove(string_buffer_t, uint_t idx, uint_t);
/*
* turn the string_buffer to a string.
* or return the buffer of the string_buffer.
*/
FRISO_API string_buffer_t string_buffer_trim( string_buffer_t );
FRISO_API string_buffer_t string_buffer_trim(string_buffer_t);
/*
* free the given fstring buffer.
* and this function will not free the allocations of the
* and this function will not free the allocations of the
* the string_buffer_t->buffer, we return it to you, if there is
* a necessary you could free it youself by calling free();
*/
FRISO_API fstring string_buffer_devote( string_buffer_t );
FRISO_API fstring string_buffer_devote(string_buffer_t);
/*
* clear the given fstring buffer.
* reset its buffer with 0 and reset its length to 0.
*/
FRISO_API void string_buffer_clear( string_buffer_t );
FRISO_API void string_buffer_clear(string_buffer_t);
//free the fstring buffer include the buffer.
FRISO_API void free_string_buffer( string_buffer_t );
FRISO_API void free_string_buffer(string_buffer_t);
/**
* fstring specified chars tokenizer functions
@ -126,28 +126,28 @@ typedef string_split_entry * string_split_t;
* create a new string_split_entry.
*
* @param source
* @return string_split_t;
* @return string_split_t;
*/
FRISO_API string_split_t new_string_split( fstring, fstring );
FRISO_API string_split_t new_string_split(fstring, fstring);
FRISO_API void string_split_reset( string_split_t, fstring, fstring );
FRISO_API void string_split_reset(string_split_t, fstring, fstring);
FRISO_API void string_split_set_source( string_split_t, fstring );
FRISO_API void string_split_set_source(string_split_t, fstring);
FRISO_API void string_split_set_delimiter( string_split_t, fstring );
FRISO_API void string_split_set_delimiter(string_split_t, fstring);
FRISO_API void free_string_split( string_split_t );
FRISO_API void free_string_split(string_split_t);
/**
* get the next split fstring, and copy the
* splited fstring into the __dst buffer .
* get the next split fstring, and copy the
* splited fstring into the __dst buffer .
*
* @param string_split_t
* @param __dst
* @return fstring (NULL if reach the end of the source
* @return fstring (NULL if reach the end of the source
* or there is no more segmentation)
*/
FRISO_API fstring string_split_next( string_split_t, fstring );
FRISO_API fstring string_split_next(string_split_t, fstring);
/* }}} */
@ -170,37 +170,37 @@ typedef friso_array_entry * friso_array_t;
#define new_array_list() new_array_list_with_opacity(__DEFAULT_ARRAY_LIST_OPACITY__)
//create a new friso dynamic array with the given opacity
FRISO_API friso_array_t new_array_list_with_opacity( uint_t );
FRISO_API friso_array_t new_array_list_with_opacity(uint_t);
/*
* free the given friso array.
* and its items, but never where the items's item to pointed to .
* and its items, but never where the items's item to pointed to .
*/
FRISO_API void free_array_list( friso_array_t );
FRISO_API void free_array_list(friso_array_t);
//add a new item to the array.
FRISO_API void array_list_add( friso_array_t, void * );
FRISO_API void array_list_add(friso_array_t, void *);
//insert a new item at a specifed position.
FRISO_API void array_list_insert( friso_array_t, uint_t, void * );
FRISO_API void array_list_insert(friso_array_t, uint_t, void *);
//get a item at a specified position.
FRISO_API void *array_list_get( friso_array_t, uint_t );
FRISO_API void *array_list_get(friso_array_t, uint_t);
/*
* set the item at a specified position.
* this will return the old value.
*/
FRISO_API void *array_list_set( friso_array_t, uint_t, void * );
FRISO_API void *array_list_set(friso_array_t, uint_t, void *);
/*
* remove the given item at a specified position.
* this will return the value of the removed item.
*/
FRISO_API void *array_list_remove( friso_array_t, uint_t );
FRISO_API void *array_list_remove(friso_array_t, uint_t);
/*trim the array list for final use.*/
FRISO_API friso_array_t array_list_trim( friso_array_t );
FRISO_API friso_array_t array_list_trim(friso_array_t);
/*
* clear the array list.
@ -208,7 +208,7 @@ FRISO_API friso_array_t array_list_trim( friso_array_t );
* but will not free the point array allocations,
* and will reset the length of it.
*/
FRISO_API friso_array_t array_list_clear( friso_array_t );
FRISO_API friso_array_t array_list_clear(friso_array_t);
//return the size of the array.
//FRISO_API uint_t array_list_size( friso_array_t );
@ -247,10 +247,10 @@ typedef struct {
typedef friso_link_entry * friso_link_t;
//create a new link list
FRISO_API friso_link_t new_link_list( void );
FRISO_API friso_link_t new_link_list(void);
//free the specified link list
FRISO_API void free_link_list( friso_link_t );
FRISO_API void free_link_list(friso_link_t);
//return the size of the current link list.
//FRISO_API uint_t link_list_size( friso_link_t );
@ -261,37 +261,37 @@ FRISO_API void free_link_list( friso_link_t );
#define link_list_empty( link ) (link->size == 0)
//clear all the nodes in the link list( except the head and the tail ).
FRISO_API friso_link_t link_list_clear( friso_link_t link );
FRISO_API friso_link_t link_list_clear(friso_link_t link);
//add a new node to the link list.(append from the tail)
FRISO_API void link_list_add( friso_link_t, void * );
FRISO_API void link_list_add(friso_link_t, void *);
//add a new node before the specified node
FRISO_API void link_list_insert_before( friso_link_t, uint_t, void * );
FRISO_API void link_list_insert_before(friso_link_t, uint_t, void *);
//get the node in the current index.
FRISO_API void *link_list_get( friso_link_t, uint_t );
FRISO_API void *link_list_get(friso_link_t, uint_t);
//modify the node in the current index.
FRISO_API void *link_list_set( friso_link_t, uint_t, void * );
FRISO_API void *link_list_set(friso_link_t, uint_t, void *);
//remove the specified link node
FRISO_API void *link_list_remove( friso_link_t, uint_t );
FRISO_API void *link_list_remove(friso_link_t, uint_t);
//remove the given node
FRISO_API void *link_list_remove_node( friso_link_t, link_node_t );
FRISO_API void *link_list_remove_node(friso_link_t, link_node_t);
//remove the node from the frist.
FRISO_API void *link_list_remove_first( friso_link_t );
FRISO_API void *link_list_remove_first(friso_link_t);
//remove the last node from the link list
FRISO_API void *link_list_remove_last( friso_link_t );
FRISO_API void *link_list_remove_last(friso_link_t);
//append a node from the end.
FRISO_API void link_list_add_last( friso_link_t, void * );
FRISO_API void link_list_add_last(friso_link_t, void *);
//add a node at the begining of the link list.
FRISO_API void link_list_add_first( friso_link_t, void * );
FRISO_API void link_list_add_first(friso_link_t, void *);
/* }}} link list interface define::end*/
@ -305,7 +305,7 @@ struct hash_entry {
};
typedef struct hash_entry friso_hash_entry;
typedef friso_hash_entry * hash_entry_t;
typedef void (*fhash_callback_fn_t)( hash_entry_t );
typedef void (*fhash_callback_fn_t)(hash_entry_t);
typedef struct {
uint_t length;
@ -324,10 +324,10 @@ typedef friso_hash_cdt * friso_hash_t;
/*
* Function: new_hash_table
* Usage: table = new_hash_table();
* --------------------------------
* --------------------------------
* this function allocates a new symbol table with no entries.
*/
FRISO_API friso_hash_t new_hash_table( void );
FRISO_API friso_hash_t new_hash_table(void);
/*
* Function: free_hash_table
@ -335,7 +335,7 @@ FRISO_API friso_hash_t new_hash_table( void );
* --------------------------------------
* this function will free all the allocation for memory.
*/
FRISO_API void free_hash_table( friso_hash_t, fhash_callback_fn_t );
FRISO_API void free_hash_table(friso_hash_t, fhash_callback_fn_t);
/*
* Function: put_new_mapping
@ -343,7 +343,7 @@ FRISO_API void free_hash_table( friso_hash_t, fhash_callback_fn_t );
* ----------------------------------------
* the function associates the specified key with the given value.
*/
FRISO_API void *hash_put_mapping( friso_hash_t, fstring, void * );
FRISO_API void *hash_put_mapping(friso_hash_t, fstring, void *);
/*
* Function: is_mapping_exists
@ -351,7 +351,7 @@ FRISO_API void *hash_put_mapping( friso_hash_t, fstring, void * );
* ----------------------------------------------
* this function check the given key mapping is exists or not.
*/
FRISO_API int hash_exist_mapping( friso_hash_t, fstring );
FRISO_API int hash_exist_mapping(friso_hash_t, fstring);
/*
* Function: get_mapping_value
@ -360,7 +360,7 @@ FRISO_API int hash_exist_mapping( friso_hash_t, fstring );
* this function return the value associated with the given key.
* UNDEFINED will be return if the mapping is not exists.
*/
FRISO_API void * hash_get_value( friso_hash_t, fstring );
FRISO_API void * hash_get_value(friso_hash_t, fstring);
/*
* Function: remove_mapping
@ -368,13 +368,13 @@ FRISO_API void * hash_get_value( friso_hash_t, fstring );
* ------------------------------------
* This function is used to remove the mapping associated with the given key.
*/
FRISO_API hash_entry_t hash_remove_mapping( friso_hash_t, fstring );
FRISO_API hash_entry_t hash_remove_mapping(friso_hash_t, fstring);
/*
* Function: get_table_size
* Usage: size = get_table_size( table );
* --------------------------------------
* This function is used to count the size of the specified table.
* This function is used to count the size of the specified table.
*/
//FRISO_API uint_t hash_get_size( friso_hash_t );
#define hash_get_size( hash ) hash->size

View File

@ -15,16 +15,15 @@
*
* @return int the bytes of the current readed word.
*/
FRISO_API int gbk_next_word(
friso_task_t task,
uint_t *idx,
fstring __word )
{
FRISO_API int gbk_next_word(
friso_task_t task,
uint_t *idx,
fstring __word) {
int c;
if ( *idx >= task->length ) return 0;
if(*idx >= task->length) return 0;
c = (uchar_t)task->text[*idx];
if ( c <= 0x80 ) {
if(c <= 0x80) {
task->bytes = 1;
} else {
task->bytes = 2;
@ -46,29 +45,27 @@ FRISO_API int gbk_next_word(
//check if the given buffer is a gbk word (ANSII string).
// included the simplified and traditional words.
FRISO_API int gbk_cn_string(char *str)
{
FRISO_API int gbk_cn_string(char *str) {
int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1];
//GBK/2: gb2312 chinese word.
return ( ((c1 >= 0xb0 && c1 <= 0xf7)
&& (c2 >= 0xa1 && c2 <= 0xfe))
//GBK/3: extend chinese words.
|| ((c1 >= 0x81 && c1 <= 0xa0)
&& ( (c2 >= 0x40 && c2 <= 0x7e)
|| (c2 >= 0x80 && c2 <= 0xfe) ))
//GBK/4: extend chinese words.
|| ((c1 >= 0xaa && c1 <= 0xfe)
&& ( (c2 >= 0x40 && c2 <= 0xfe)
|| (c2 >= 0x80 && c2 <= 0xa0) )) );
return (((c1 >= 0xb0 && c1 <= 0xf7)
&& (c2 >= 0xa1 && c2 <= 0xfe))
//GBK/3: extend chinese words.
|| ((c1 >= 0x81 && c1 <= 0xa0)
&& ((c2 >= 0x40 && c2 <= 0x7e)
|| (c2 >= 0x80 && c2 <= 0xfe)))
//GBK/4: extend chinese words.
|| ((c1 >= 0xaa && c1 <= 0xfe)
&& ((c2 >= 0x40 && c2 <= 0xfe)
|| (c2 >= 0x80 && c2 <= 0xa0))));
}
/*check if the given char is a ASCII letter
* include all the arabic number, letters and english puntuations.*/
FRISO_API int gbk_halfwidth_en_char( char c )
{
FRISO_API int gbk_halfwidth_en_char(char c) {
int u = (uchar_t) c;
return ( u >= 32 && u <= 126 );
return (u >= 32 && u <= 126);
}
/*
@ -76,52 +73,48 @@ FRISO_API int gbk_halfwidth_en_char( char c )
* include the full-width arabic numeber, letters.
* but not the full-width puntuations.
*/
FRISO_API int gbk_fullwidth_en_char( char *str )
{
FRISO_API int gbk_fullwidth_en_char(char *str) {
int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1];
return ( (c1 == 0xA3)
&& ( (c2 >= 0xB0 && c2 <= 0xB9) //arabic numbers.
|| ( c2 >= 0xC1 && c2 <= 0xDA ) //uppercase letters.
|| ( c2 >= 0xE1 && c2 <= 0xFA) ) ); //lowercase letters.
return ((c1 == 0xA3)
&& ((c2 >= 0xB0 && c2 <= 0xB9) //arabic numbers.
|| (c2 >= 0xC1 && c2 <= 0xDA) //uppercase letters.
|| (c2 >= 0xE1 && c2 <= 0xFA))); //lowercase letters.
}
//check if the given char is a upper case english letter.
// included the full-width and half-width letters.
FRISO_API int gbk_uppercase_letter( char *str )
{
FRISO_API int gbk_uppercase_letter(char *str) {
int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1];
if ( c1 <= 0x80 ) { //half-width
return ( c1 >= 65 && c1 <= 90 );
if(c1 <= 0x80) { //half-width
return (c1 >= 65 && c1 <= 90);
} else { //full-width
return ( c1 == 0xa3 && ( c2 >= 0xc1 && c2 <= 0xda ) );
return (c1 == 0xa3 && (c2 >= 0xc1 && c2 <= 0xda));
}
}
//check if the given char is a lower case char.
// included the full-width and half-width letters.
FRISO_API int gbk_lowercase_letter( char *str )
{
FRISO_API int gbk_lowercase_letter(char *str) {
int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1];
if ( c1 <= 0x80 ) { //half-width
return ( c1 >= 97 && c1 <= 122 );
if(c1 <= 0x80) { //half-width
return (c1 >= 97 && c1 <= 122);
} else { //full-width
return ( c1 == 0xa3 && ( c2 >= 0xe1 && c2 <= 0xfa ) );
return (c1 == 0xa3 && (c2 >= 0xe1 && c2 <= 0xfa));
}
}
//check if the given char is a arabic numeric.
// included the full-width and half-width arabic numeric.
FRISO_API int gbk_numeric_letter( char *str )
{
FRISO_API int gbk_numeric_letter(char *str) {
int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1];
if ( c1 <= 0x80 ) { //half-width
return ( c1 >= 48 && c1 <= 57 );
if(c1 <= 0x80) { //half-width
return (c1 >= 48 && c1 <= 57);
} else { //full-width
return ( ( c1 == 0xa3 ) && ( c2 >= 0xb0 && c2 <= 0xb9 ) );
return ((c1 == 0xa3) && (c2 >= 0xb0 && c2 <= 0xb9));
}
}
@ -129,49 +122,47 @@ FRISO_API int gbk_numeric_letter( char *str )
* check if the given fstring is make up with numeric chars.
* both full-width,half-width numeric is ok.
*/
FRISO_API int gbk_numeric_string( char *str )
{
FRISO_API int gbk_numeric_string(char *str) {
char *s = str;
int c1 = 0;
int c2 = 0;
while ( *s != '\0' ) {
c1 = (uchar_t) (*s++);
if ( c1 <= 0x80 ) { //half-width
if ( c1 < 48 || c2 > 57 ) return 0;
while(*s != '\0') {
c1 = (uchar_t)(*s++);
if(c1 <= 0x80) { //half-width
if(c1 < 48 || c2 > 57) return 0;
} else { //full-width
if ( c1 != 0xa3 ) return 0;
c2 = (uchar_t) (*s++);
if ( c2 < 0xb0 || c2 > 0xb9 ) return 0;
if(c1 != 0xa3) return 0;
c2 = (uchar_t)(*s++);
if(c2 < 0xb0 || c2 > 0xb9) return 0;
}
}
return 1;
}
FRISO_API int gbk_decimal_string( char *str )
{
FRISO_API int gbk_decimal_string(char *str) {
int c1 = 0;
int c2 = 0;
int len = strlen(str), i, p = 0;
//point header check.
if ( str[0] == '.' || str[len - 1] == '.' ) return 0;
if(str[0] == '.' || str[len - 1] == '.') return 0;
for ( i = 0; i < len; ) {
for(i = 0; i < len;) {
c1 = (uchar_t) str[i++];
//count the number of the points.
if ( c1 == 46 ) {
if(c1 == 46) {
p++;
continue;
}
if ( c1 <= 0x80 ) { //half-width
if ( c1 < 48 || c1 > 57 ) return 0;
if(c1 <= 0x80) { //half-width
if(c1 < 48 || c1 > 57) return 0;
} else { //full-width
if ( c1 != 0xa3 ) return 0;
if(c1 != 0xa3) return 0;
c2 = (uchar_t) str[i++];
if ( c2 < 0xb0 || c2 > 0xb9 ) return 0;
if(c2 < 0xb0 || c2 > 0xb9) return 0;
}
}
@ -180,17 +171,16 @@ FRISO_API int gbk_decimal_string( char *str )
//check if the given char is a english(ASCII) letter.
// (full-width and half-width), not the punctuation/arabic of course.
FRISO_API int gbk_en_letter( char *str )
{
FRISO_API int gbk_en_letter(char *str) {
int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1];
if ( c1 <= 0x80 ) {
return ( (c1 >= 65 && c1 <= 90) //lowercase
|| (c1 >= 97 && c1 <= 122)); //uppercase
if(c1 <= 0x80) {
return ((c1 >= 65 && c1 <= 90) //lowercase
|| (c1 >= 97 && c1 <= 122)); //uppercase
} else {
return ( (c1 == 0xa3)
&& ( ( c2 >= 0xc1 && c2 <= 0xda ) //lowercase
|| ( c2 >= 0xe1 && c2 <= 0xfa ) ) ); //uppercase
return ((c1 == 0xa3)
&& ((c2 >= 0xc1 && c2 <= 0xda) //lowercase
|| (c2 >= 0xe1 && c2 <= 0xfa))); //uppercase
}
return 0;
@ -198,65 +188,60 @@ FRISO_API int gbk_en_letter( char *str )
//check the given char is a whitespace or not.
// included full-width and half-width whitespace.
FRISO_API int gbk_whitespace( char *str )
{
FRISO_API int gbk_whitespace(char *str) {
int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1];
if ( c1 <= 0x80 ) {
if(c1 <= 0x80) {
return (c1 == 32);
} else {
return ( c1 == 0xa3 && c2 == 0xa0 );
return (c1 == 0xa3 && c2 == 0xa0);
}
}
/* check if the given char is a letter number like 'ⅠⅡ'
*/
FRISO_API int gbk_letter_number( char *str )
{
FRISO_API int gbk_letter_number(char *str) {
int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1];
return ( (c1 == 0xa2)
&& ( ( c2 >= 0xa1 && c2 <= 0xb0 ) //lowercase
|| ( c2 >= 0xf0 && c2 <= 0xfe ) ) ); //uppercase
return ((c1 == 0xa2)
&& ((c2 >= 0xa1 && c2 <= 0xb0) //lowercase
|| (c2 >= 0xf0 && c2 <= 0xfe))); //uppercase
}
/*
* check if the given char is a other number like ''
*/
FRISO_API int gbk_other_number( char *str )
{
FRISO_API int gbk_other_number(char *str) {
int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1];
return ( ( c1 == 0xa2 ) && ( c2 >= 0xc5 && c2 <= 0xee ) );
return ((c1 == 0xa2) && (c2 >= 0xc5 && c2 <= 0xee));
}
//check if the given char is a english punctuation.
FRISO_API int gbk_en_punctuation( char c )
{
FRISO_API int gbk_en_punctuation(char c) {
int u = (uchar_t) c;
return ( (u > 32 && u < 48)
|| ( u > 57 && u < 65 )
|| ( u > 90 && u < 97 )
|| ( u > 122 && u < 127 ) );
return ((u > 32 && u < 48)
|| (u > 57 && u < 65)
|| (u > 90 && u < 97)
|| (u > 122 && u < 127));
}
//check the given char is a chinese punctuation.
FRISO_API int gbk_cn_punctuation( char *str )
{
FRISO_API int gbk_cn_punctuation(char *str) {
int c1 = (uchar_t) str[0];
int c2 = (uchar_t) str[1];
//full-width en punctuation.
return ( (c1 == 0xa3 && (( c2 >= 0xa1 && c2 <= 0xaf )
|| ( c2 >= 0xba && c2 <= 0xc0 )
|| ( c2 >= 0xdb && c2 <= 0xe0 )
|| ( c2 >= 0xfb && c2 <= 0xfe ) ))
//chinese punctuation.
|| (c1 == 0xa1 && ( (c2 >= 0xa1 && c2 <= 0xae)
|| ( c2 >= 0xb0 && c2 <= 0xbf ) ))
//A6 area special punctuations:" "
|| (c1 == 0xa6 && (c2 >= 0xf9 && c2 <= 0xfe))
//A8 area special punctuations: " ˊˋ˙–―‥‵℅ "
|| (c1 == 0xa8 && (c2 >= 0x40 && c2 <= 0x47)) );
return ((c1 == 0xa3 && ((c2 >= 0xa1 && c2 <= 0xaf)
|| (c2 >= 0xba && c2 <= 0xc0)
|| (c2 >= 0xdb && c2 <= 0xe0)
|| (c2 >= 0xfb && c2 <= 0xfe)))
//chinese punctuation.
|| (c1 == 0xa1 && ((c2 >= 0xa1 && c2 <= 0xae)
|| (c2 >= 0xb0 && c2 <= 0xbf)))
//A6 area special punctuations:" "
|| (c1 == 0xa6 && (c2 >= 0xf9 && c2 <= 0xfe))
//A8 area special punctuations: " ˊˋ˙–―‥‵℅ "
|| (c1 == 0xa8 && (c2 >= 0x40 && c2 <= 0x47)));
}
/* {{{
@ -292,7 +277,7 @@ FRISO_API int gbk_cn_punctuation( char *str )
/* }}} */
//check if the given english char is a full-width char or not.
//FRISO_API int gbk_fullwidth_char( char *str )
//FRISO_API int gbk_fullwidth_char( char *str )
//{
// return 1;
//}

View File

@ -15,15 +15,14 @@
*
* @return int the bytes of the current readed word.
*/
FRISO_API int utf8_next_word(
friso_task_t task,
uint_t *idx,
fstring __word )
{
if ( *idx >= task->length ) return 0;
FRISO_API int utf8_next_word(
friso_task_t task,
uint_t *idx,
fstring __word) {
if(*idx >= task->length) return 0;
//register uint_t t;
task->bytes = get_utf8_bytes( task->text[ *idx ] );
task->bytes = get_utf8_bytes(task->text[ *idx ]);
//for ( t = 0; t < task->bytes; t++ ) {
// __word[t] = task->text[ (*idx)++ ];
@ -37,7 +36,7 @@ FRISO_API int utf8_next_word(
__word[task->bytes] = '\0';
//the unicode counter was moved here from version 1.6.0
task->unicode = get_utf8_unicode( __word );
task->unicode = get_utf8_unicode(__word);
return task->bytes;
}
@ -47,12 +46,11 @@ FRISO_API int utf8_next_word(
*
* @param int
*/
FRISO_API void print_char_binary( char value )
{
FRISO_API void print_char_binary(char value) {
register uint_t t;
for ( t = 0; t < __CHAR_BYTES__; t++ ) {
if ( ( value & 0x80 ) == 0x80 ) {
for(t = 0; t < __CHAR_BYTES__; t++) {
if((value & 0x80) == 0x80) {
printf("1");
} else {
printf("0");
@ -66,15 +64,14 @@ FRISO_API void print_char_binary( char value )
* between 1 - 6.
*
* @param __char
* @return int
* @return int
*/
FRISO_API int get_utf8_bytes( char value )
{
FRISO_API int get_utf8_bytes(char value) {
register uint_t t = 0;
//one byte ascii char.
if ( ( value & 0x80 ) == 0 ) return 1;
for ( ; ( value & 0x80 ) != 0; value <<= 1 ) {
if((value & 0x80) == 0) return 1;
for(; (value & 0x80) != 0; value <<= 1) {
t++;
}
@ -83,17 +80,16 @@ FRISO_API int get_utf8_bytes( char value )
/*
* get the unicode serial of a utf-8 char.
*
*
* @param ch
* @return int.
*/
FRISO_API int get_utf8_unicode( const fstring ch )
{
int code = 0, bytes = get_utf8_bytes( *ch );
register uchar_t *bit = ( uchar_t * ) &code;
register char b1,b2,b3;
FRISO_API int get_utf8_unicode(const fstring ch) {
int code = 0, bytes = get_utf8_bytes(*ch);
register uchar_t *bit = (uchar_t *) &code;
register char b1, b2, b3;
switch ( bytes ) {
switch(bytes) {
case 1:
*bit = *ch;
break;
@ -102,7 +98,7 @@ FRISO_API int get_utf8_unicode( const fstring ch )
b2 = *(ch + 1);
*bit = (b1 << 6) + (b2 & 0x3F);
*(bit+1) = (b1 >> 2) & 0x07;
*(bit + 1) = (b1 >> 2) & 0x07;
break;
case 3:
b1 = *ch;
@ -110,7 +106,7 @@ FRISO_API int get_utf8_unicode( const fstring ch )
b3 = *(ch + 2);
*bit = (b2 << 6) + (b3 & 0x3F);
*(bit+1) = (b1 << 4) + ((b2 >> 2) & 0x0F);
*(bit + 1) = (b1 << 4) + ((b2 >> 2) & 0x0F);
break;
//ignore the ones that are larger than 3 bytes;
}
@ -119,51 +115,50 @@ FRISO_API int get_utf8_unicode( const fstring ch )
}
//turn the unicode serial to a utf-8 string.
FRISO_API int unicode_to_utf8( uint_t u, fstring __word )
{
if ( u <= 0x0000007F ) {
FRISO_API int unicode_to_utf8(uint_t u, fstring __word) {
if(u <= 0x0000007F) {
//U-00000000 - U-0000007F
//0xxxxxxx
*__word = ( u & 0x7F );
*__word = (u & 0x7F);
return 1;
} else if ( u >= 0x00000080 && u <= 0x000007FF ) {
} else if(u >= 0x00000080 && u <= 0x000007FF) {
//U-00000080 - U-000007FF
//110xxxxx 10xxxxxx
*( __word + 1 ) = ( u & 0x3F) | 0x80;
*(__word + 1) = (u & 0x3F) | 0x80;
*__word = ((u >> 6) & 0x1F) | 0xC0;
return 2;
} else if ( u >= 0x00000800 && u <= 0x0000FFFF ) {
} else if(u >= 0x00000800 && u <= 0x0000FFFF) {
//U-00000800 - U-0000FFFF
//1110xxxx 10xxxxxx 10xxxxxx
*( __word + 2 ) = ( u & 0x3F) | 0x80;
*( __word + 1 ) = ((u >> 6) & 0x3F) | 0x80;
*(__word + 2) = (u & 0x3F) | 0x80;
*(__word + 1) = ((u >> 6) & 0x3F) | 0x80;
*__word = ((u >> 12) & 0x0F) | 0xE0;
return 3;
} else if ( u >= 0x00010000 && u <= 0x001FFFFF ) {
} else if(u >= 0x00010000 && u <= 0x001FFFFF) {
//U-00010000 - U-001FFFFF
//11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*( __word + 3 ) = ( u & 0x3F) | 0x80;
*( __word + 2 ) = ((u >> 6) & 0x3F) | 0x80;
*( __word + 1 ) = ((u >> 12) & 0x3F) | 0x80;
*(__word + 3) = (u & 0x3F) | 0x80;
*(__word + 2) = ((u >> 6) & 0x3F) | 0x80;
*(__word + 1) = ((u >> 12) & 0x3F) | 0x80;
*__word = ((u >> 18) & 0x07) | 0xF0;
return 4;
} else if ( u >= 0x00200000 && u <= 0x03FFFFFF ) {
} else if(u >= 0x00200000 && u <= 0x03FFFFFF) {
//U-00200000 - U-03FFFFFF
//111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*( __word + 4 ) = ( u & 0x3F) | 0x80;
*( __word + 3 ) = ((u >> 6) & 0x3F) | 0x80;
*( __word + 2 ) = ((u >> 12) & 0x3F) | 0x80;
*( __word + 1 ) = ((u >> 18) & 0x3F) | 0x80;
*(__word + 4) = (u & 0x3F) | 0x80;
*(__word + 3) = ((u >> 6) & 0x3F) | 0x80;
*(__word + 2) = ((u >> 12) & 0x3F) | 0x80;
*(__word + 1) = ((u >> 18) & 0x3F) | 0x80;
*__word = ((u >> 24) & 0x03) | 0xF8;
return 5;
} else if ( u >= 0x04000000 && u <= 0x7FFFFFFF ) {
} else if(u >= 0x04000000 && u <= 0x7FFFFFFF) {
//U-04000000 - U-7FFFFFFF
//1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*( __word + 5 ) = ( u & 0x3F) | 0x80;
*( __word + 4 ) = ((u >> 6) & 0x3F) | 0x80;
*( __word + 3 ) = ((u >> 12) & 0x3F) | 0x80;
*( __word + 2 ) = ((u >> 18) & 0x3F) | 0x80;
*( __word + 1 ) = ((u >> 24) & 0x3F) | 0x80;
*(__word + 5) = (u & 0x3F) | 0x80;
*(__word + 4) = ((u >> 6) & 0x3F) | 0x80;
*(__word + 3) = ((u >> 12) & 0x3F) | 0x80;
*(__word + 2) = ((u >> 18) & 0x3F) | 0x80;
*(__word + 1) = ((u >> 24) & 0x3F) | 0x80;
*__word = ((u >> 30) & 0x01) | 0xFC;
return 6;
}
@ -173,17 +168,17 @@ FRISO_API int unicode_to_utf8( uint_t u, fstring __word )
/*
* check the given char is a CJK char or not.
* 2E80-2EFF CJK
* 2E80-2EFF CJK
* 2F00-2FDF
* 3000-303F CJK --ignore
* 31C0-31EF CJK
* 3200-32FF CJK --ignore.
* 3300-33FF CJK
* 3400-4DBF CJK A
* 3400-4DBF CJK A
* 4DC0-4DFF
* 4E00-9FBF CJK
* 4E00-9FBF CJK
* F900-FAFF CJK
* FE30-FE4F CJK
* FE30-FE4F CJK
* FF00-FFEF ASCII --ignore (as basic latin)
*
* Japanese:
@ -195,9 +190,9 @@ FRISO_API int unicode_to_utf8( uint_t u, fstring __word )
* AC00-D7AF
* 1100-11FF
* 3130-318F
*
*
* @param ch :pointer to the char
* @return int : 1 for yes and 0 for not.
* @return int : 1 for yes and 0 for not.
*/
//Comment one of the following macro define
@ -205,44 +200,42 @@ FRISO_API int unicode_to_utf8( uint_t u, fstring __word )
#define FRISO_CJK_CHK_C
//#define FRISO_CJK_CHK_J
//#define FRISO_CJK_CHK_K
FRISO_API int utf8_cjk_string( uint_t u )
{
FRISO_API int utf8_cjk_string(uint_t u) {
int c = 0, j = 0, k = 0;
//Chinese.
#ifdef FRISO_CJK_CHK_C
c = ( ( u >= 0x4E00 && u <= 0x9FBF )
|| ( u >= 0x2E80 && u <= 0x2EFF ) || ( u >= 0x2F00 && u <= 0x2FDF )
|| ( u >= 0x31C0 && u <= 0x31EF ) //|| ( u >= 0x3200 && u <= 0x32FF )
|| ( u >= 0x3300 && u <= 0x33FF ) //|| ( u >= 0x3400 && u <= 0x4DBF )
|| ( u >= 0x4DC0 && u <= 0x4DFF ) || ( u >= 0xF900 && u <= 0xFAFF )
|| ( u >= 0xFE30 && u <= 0xFE4F ) );
c = ((u >= 0x4E00 && u <= 0x9FBF)
|| (u >= 0x2E80 && u <= 0x2EFF) || (u >= 0x2F00 && u <= 0x2FDF)
|| (u >= 0x31C0 && u <= 0x31EF) //|| ( u >= 0x3200 && u <= 0x32FF )
|| (u >= 0x3300 && u <= 0x33FF) //|| ( u >= 0x3400 && u <= 0x4DBF )
|| (u >= 0x4DC0 && u <= 0x4DFF) || (u >= 0xF900 && u <= 0xFAFF)
|| (u >= 0xFE30 && u <= 0xFE4F));
#endif
//Japanese.
#ifdef FRISO_CJK_CHK_J
j = ( ( u >= 0x3040 && u <= 0x309F )
|| ( u >= 0x30A0 && u <= 0x30FF ) || ( u >= 0x31F0 && u <= 0x31FF ) );
j = ((u >= 0x3040 && u <= 0x309F)
|| (u >= 0x30A0 && u <= 0x30FF) || (u >= 0x31F0 && u <= 0x31FF));
#endif
//Korean
#ifdef FRISO_CJK_CHK_K
k = ( ( u >= 0xAC00 && u <= 0xD7AF )
|| ( u >= 0x1100 && u <= 0x11FF ) || ( u >= 0x3130 && u <= 0x318F ) );
k = ((u >= 0xAC00 && u <= 0xD7AF)
|| (u >= 0x1100 && u <= 0x11FF) || (u >= 0x3130 && u <= 0x318F));
#endif
return ( c || j || k );
return (c || j || k);
}
/*
* check the given char is a Basic Latin letter or not.
* include all the letters and english punctuations.
*
*
* @param c
* @return int 1 for yes and 0 for not.
* @return int 1 for yes and 0 for not.
*/
FRISO_API int utf8_halfwidth_en_char( uint_t u )
{
return ( u >= 32 && u <= 126 );
FRISO_API int utf8_halfwidth_en_char(uint_t u) {
return (u >= 32 && u <= 126);
}
/*
@ -253,44 +246,39 @@ FRISO_API int utf8_halfwidth_en_char( uint_t u )
* @param c
* @return int
*/
FRISO_API int utf8_fullwidth_en_char( uint_t u )
{
return ( (u >= 65296 && u <= 65305 ) //arabic number
|| ( u >= 65313 && u <= 65338 ) //upper case letters
|| ( u >= 65345 && u <= 65370 ) ); //lower case letters
FRISO_API int utf8_fullwidth_en_char(uint_t u) {
return ((u >= 65296 && u <= 65305) //arabic number
|| (u >= 65313 && u <= 65338) //upper case letters
|| (u >= 65345 && u <= 65370)); //lower case letters
}
//check the given char is a upper case letters or not.
// included the full-width and half-width letters.
FRISO_API int utf8_uppercase_letter( uint_t u )
{
if ( u > 65280 ) u -= 65248;
return ( u >= 65 && u <= 90 );
FRISO_API int utf8_uppercase_letter(uint_t u) {
if(u > 65280) u -= 65248;
return (u >= 65 && u <= 90);
}
//check the given char is a upper case letters or not.
// included the full-width and half-width letters.
FRISO_API int utf8_lowercase_letter( uint_t u )
{
if ( u > 65280 ) u -= 65248;
return ( u >= 97 && u <= 122 );
FRISO_API int utf8_lowercase_letter(uint_t u) {
if(u > 65280) u -= 65248;
return (u >= 97 && u <= 122);
}
//check the given char is a numeric
// included the full-width and half-width arabic numeric.
FRISO_API int utf8_numeric_letter( uint_t u )
{
if ( u > 65280 ) u -= 65248; //make full-width half-width.
return ( ( u >= 48 && u <= 57 ) );
FRISO_API int utf8_numeric_letter(uint_t u) {
if(u > 65280) u -= 65248; //make full-width half-width.
return ((u >= 48 && u <= 57));
}
//check the given char is a english letter.(included the full-width)
// not the punctuation of course.
FRISO_API int utf8_en_letter( uint_t u )
{
if ( u > 65280 ) u -= 65248;
return ( ( u >= 65 && u <= 90 )
|| ( u >= 97 && u <= 122 ) );
FRISO_API int utf8_en_letter(uint_t u) {
if(u > 65280) u -= 65248;
return ((u >= 65 && u <= 90)
|| (u >= 97 && u <= 122));
}
/*
@ -310,24 +298,23 @@ FRISO_API int utf8_en_letter( uint_t u )
* 65304,
* 65305,
*/
FRISO_API int utf8_numeric_string( const fstring str )
{
FRISO_API int utf8_numeric_string(const fstring str) {
fstring s = str;
int bytes, u;
while ( *s != '\0' ) {
while(*s != '\0') {
//if ( ! utf8_numeric_letter( get_utf8_unicode( s++ ) ) ) {
// return 0;
//}
//}
//new implemention.
//@date 2013-10-14
bytes = 1;
if ( *s < 0 ) { //full-width chars.
if(*s < 0) { //full-width chars.
u = get_utf8_unicode(s);
bytes = get_utf8_bytes(*s);
if ( u < 65296 || u > 65305 ) return 0;
} else if ( *s < 48 || *s > 57 ) {
if(u < 65296 || u > 65305) return 0;
} else if(*s < 48 || *s > 57) {
return 0;
}
@ -337,25 +324,24 @@ FRISO_API int utf8_numeric_string( const fstring str )
return 1;
}
FRISO_API int utf8_decimal_string( const fstring str )
{
FRISO_API int utf8_decimal_string(const fstring str) {
int len = strlen(str), i, p = 0;
int bytes = 0, u;
if ( str[0] == '.' || str[len-1] == '.' ) return 0;
if(str[0] == '.' || str[len - 1] == '.') return 0;
for ( i = 1; i < len; bytes = 1 ) {
for(i = 1; i < len; bytes = 1) {
//count the number of char '.'
if ( str[i] == '.' ) {
if(str[i] == '.') {
i++;
p++;
continue;
} else if ( str[i] < 0 ) {
} else if(str[i] < 0) {
//full-width numeric.
u = get_utf8_unicode(str+i);
u = get_utf8_unicode(str + i);
bytes = get_utf8_bytes(str[i]);
if ( u < 65296 || u > 65305 ) return 0;
} else if ( str[i] < 48 || str[i] > 57 ) {
if(u < 65296 || u > 65305) return 0;
} else if(str[i] < 48 || str[i] > 57) {
return 0;
}
@ -367,13 +353,12 @@ FRISO_API int utf8_decimal_string( const fstring str )
/*
* check the given char is a whitespace or not.
*
*
* @param ch
* @return int 1 for yes and 0 for not.
* @return int 1 for yes and 0 for not.
*/
FRISO_API int utf8_whitespace( uint_t u )
{
if ( u == 32 || u == 12288 ) {
FRISO_API int utf8_whitespace(uint_t u) {
if(u == 32 || u == 12288) {
return 1;
}
return 0;
@ -382,17 +367,16 @@ FRISO_API int utf8_whitespace( uint_t u )
/*
* check the given char is a english punctuation.
*
*
* @param ch
* @return int
* @return int
*/
FRISO_API int utf8_en_punctuation( uint_t u )
{
FRISO_API int utf8_en_punctuation(uint_t u) {
//if ( u > 65280 ) u = u - 65248; //make full-width half-width
return ( (u > 32 && u < 48)
|| ( u > 57 && u < 65 )
|| ( u > 90 && u < 97 ) //added @2013-08-31
|| ( u > 122 && u < 127 ) );
return ((u > 32 && u < 48)
|| (u > 57 && u < 65)
|| (u > 90 && u < 97) //added @2013-08-31
|| (u > 122 && u < 127));
}
/*
@ -400,17 +384,16 @@ FRISO_API int utf8_en_punctuation( uint_t u )
* @date 2013-08-31 added.
*
* @param ch
* @return int
* @return int
*/
FRISO_API int utf8_cn_punctuation( uint_t u )
{
return ( ( u > 65280 && u < 65296 )
|| ( u > 65305 && u < 65312 )
|| ( u > 65338 && u < 65345 )
|| ( u > 65370 && u < 65382 )
//cjk symbol and punctuation.(added 2013-09-06)
//from http://www.unicode.org/charts/PDF/U3000.pdf
|| ( u >= 12289 && u <= 12319) );
FRISO_API int utf8_cn_punctuation(uint_t u) {
return ((u > 65280 && u < 65296)
|| (u > 65305 && u < 65312)
|| (u > 65338 && u < 65345)
|| (u > 65370 && u < 65382)
//cjk symbol and punctuation.(added 2013-09-06)
//from http://www.unicode.org/charts/PDF/U3000.pdf
|| (u >= 12289 && u <= 12319));
}
/*
@ -419,8 +402,7 @@ FRISO_API int utf8_cn_punctuation( uint_t u )
* @param ch
* @return int
*/
FRISO_API int utf8_letter_number( uint_t u )
{
FRISO_API int utf8_letter_number(uint_t u) {
return 0;
}
@ -430,13 +412,12 @@ FRISO_API int utf8_letter_number( uint_t u )
* @param ch
* @return int
*/
FRISO_API int utf8_other_number( uint_t u )
{
FRISO_API int utf8_other_number(uint_t u) {
return 0;
}
//A macro define has replace this.
//FRISO_API int is_en_punctuation( char c )
//FRISO_API int is_en_punctuation( char c )
//{
// return utf8_en_punctuation( (uint_t) c );
//}
@ -448,9 +429,9 @@ FRISO_API int utf8_other_number( uint_t u )
/* @Deprecated
* check the given char is an english keep punctuation.*/
//FRISO_API int utf8_keep_punctuation( fstring str )
//FRISO_API int utf8_keep_punctuation( fstring str )
//{
// if ( __keep_punctuations_hash__ == NULL )
// if ( __keep_punctuations_hash__ == NULL )
// {
// __keep_punctuations_hash__ = new_hash_table();
// hash_put_mapping( __keep_punctuations_hash__, "@", NULL );
@ -473,11 +454,11 @@ FRISO_API int utf8_other_number( uint_t u )
/*
* check the given english char is a full-width char or not.
*
*
* @param ch
* @return 1 for yes and 0 for not.
* @return 1 for yes and 0 for not.
*/
//FRISO_API int utf8_fullwidth_char( uint_t u )
//FRISO_API int utf8_fullwidth_char( uint_t u )
//{
// if ( u == 12288 )
// return 1; //full-width space

View File

@ -10,16 +10,15 @@
/* ********************************************
* friso array list static functions block *
**********************************************/
__STATIC_API__ void **create_array_entries( uint_t __blocks )
{
__STATIC_API__ void **create_array_entries(uint_t __blocks) {
register uint_t t;
void **block = ( void ** ) FRISO_CALLOC( sizeof( void * ), __blocks );
if ( block == NULL ) {
void **block = (void **) FRISO_CALLOC(sizeof(void *), __blocks);
if(block == NULL) {
___ALLOCATION_ERROR___
}
//initialize
for ( t = 0; t < __blocks; t++ ) {
for(t = 0; t < __blocks; t++) {
block[t] = NULL;
}
@ -27,18 +26,17 @@ __STATIC_API__ void **create_array_entries( uint_t __blocks )
}
//resize the array. (the opacity should not be smaller than array->length)
__STATIC_API__ friso_array_t resize_array_list(
friso_array_t array,
uint_t opacity )
{
__STATIC_API__ friso_array_t resize_array_list(
friso_array_t array,
uint_t opacity) {
register uint_t t;
void **block = create_array_entries( opacity );
void **block = create_array_entries(opacity);
for ( t = 0; t < array->length ; t++ ) {
for(t = 0; t < array->length ; t++) {
block[t] = array->items[t];
}
FRISO_FREE( array->items );
FRISO_FREE(array->items);
array->items = block;
array->allocs = opacity;
@ -55,16 +53,15 @@ __STATIC_API__ friso_array_t resize_array_list(
//}
//create a new array list with a given opacity.
FRISO_API friso_array_t new_array_list_with_opacity( uint_t opacity )
{
friso_array_t array = ( friso_array_t )
FRISO_MALLOC( sizeof( friso_array_entry ) );
if ( array == NULL ) {
FRISO_API friso_array_t new_array_list_with_opacity(uint_t opacity) {
friso_array_t array = (friso_array_t)
FRISO_MALLOC(sizeof(friso_array_entry));
if(array == NULL) {
___ALLOCATION_ERROR___
}
//initialize
array->items = create_array_entries( opacity );
array->items = create_array_entries(opacity);
array->allocs = opacity;
array->length = 0;
@ -73,10 +70,9 @@ FRISO_API friso_array_t new_array_list_with_opacity( uint_t opacity )
/*
* free the given friso array.
* and its items, but never where its items item pointed to .
* and its items, but never where its items item pointed to .
*/
FRISO_API void free_array_list( friso_array_t array )
{
FRISO_API void free_array_list(friso_array_t array) {
//free the allocation that all the items pointed to
//register int t;
//if ( flag == 1 ) {
@ -87,40 +83,38 @@ FRISO_API void free_array_list( friso_array_t array )
// }
//}
FRISO_FREE( array->items );
FRISO_FREE( array );
FRISO_FREE(array->items);
FRISO_FREE(array);
}
//add a new item to the array.
FRISO_API void array_list_add( friso_array_t array, void *value )
{
FRISO_API void array_list_add(friso_array_t array, void *value) {
//check the condition to resize.
if ( array->length == array->allocs ) {
resize_array_list( array, array->length * 2 + 1 );
if(array->length == array->allocs) {
resize_array_list(array, array->length * 2 + 1);
}
array->items[array->length++] = value;
}
//insert a new item at a specified position.
FRISO_API void array_list_insert(
friso_array_t array,
uint_t idx,
void *value )
{
FRISO_API void array_list_insert(
friso_array_t array,
uint_t idx,
void *value) {
register uint_t t;
if ( idx <= array->length ) {
if(idx <= array->length) {
//check the condition to resize the array.
if ( array->length == array->allocs ) {
resize_array_list( array, array->length * 2 + 1 );
if(array->length == array->allocs) {
resize_array_list(array, array->length * 2 + 1);
}
//move the elements after idx.
//for ( t = idx; t < array->length; t++ ) {
// array->items[t+1] = array->items[t];
//}
for ( t = array->length - 1; t >= idx; t-- ) {
array->items[t+1] = array->items[t];
for(t = array->length - 1; t >= idx; t--) {
array->items[t + 1] = array->items[t];
}
array->items[idx] = value;
@ -129,9 +123,8 @@ FRISO_API void array_list_insert(
}
//get the item at a specified position.
FRISO_API void *array_list_get( friso_array_t array, uint_t idx )
{
if ( idx < array->length ) {
FRISO_API void *array_list_get(friso_array_t array, uint_t idx) {
if(idx < array->length) {
return array->items[idx];
}
return NULL;
@ -139,13 +132,12 @@ FRISO_API void *array_list_get( friso_array_t array, uint_t idx )
//set the value of the item at a specified position.
//this will return the old value.
FRISO_API void * array_list_set(
friso_array_t array,
uint_t idx,
void * value )
{
FRISO_API void * array_list_set(
friso_array_t array,
uint_t idx,
void * value) {
void * oval = NULL;
if ( idx < array->length ) {
if(idx < array->length) {
oval = array->items[idx];
array->items[idx] = value;
}
@ -154,16 +146,15 @@ FRISO_API void * array_list_set(
//remove the item at a specified position.
//this will return the value of the removed item.
FRISO_API void * array_list_remove(
friso_array_t array, uint_t idx )
{
FRISO_API void * array_list_remove(
friso_array_t array, uint_t idx) {
register uint_t t;
void *oval = NULL;
if ( idx < array->length ) {
if(idx < array->length) {
oval = array->items[idx];
//move the elements after idx.
for ( t = idx; t < array->length - 1; t++ ) {
for(t = idx; t < array->length - 1; t++) {
array->items[t] = array->items[ t + 1 ];
}
array->items[array->length - 1] = NULL;
@ -174,11 +165,10 @@ FRISO_API void * array_list_remove(
}
/*trim the array list*/
FRISO_API friso_array_t array_list_trim( friso_array_t array )
{
if ( array->length < array->allocs ) {
return resize_array_list( array, array->length );
}
FRISO_API friso_array_t array_list_trim(friso_array_t array) {
if(array->length < array->allocs) {
return resize_array_list(array, array->length);
}
return array;
}
@ -188,20 +178,19 @@ FRISO_API friso_array_t array_list_trim( friso_array_t array )
* but will not free the point array allocations,
* and will reset the length of it.
*/
FRISO_API friso_array_t array_list_clear( friso_array_t array )
{
FRISO_API friso_array_t array_list_clear(friso_array_t array) {
register uint_t t;
//free all the allocations that the array->length's pointer pointed.
for ( t = 0; t < array->length; t++ ) {
for(t = 0; t < array->length; t++) {
/*if ( array->items[t] == NULL ) continue;
FRISO_FREE( array->items[t] ); */
array->items[t] = NULL;
array->items[t] = NULL;
}
//attribute reset.
array->length = 0;
return array;
}
}
//get the size of the array list. (A macro define has replace this.)
//FRISO_API uint_t array_list_size( friso_array_t array ) {
@ -214,7 +203,7 @@ FRISO_API friso_array_t array_list_clear( friso_array_t array )
//}
//check if the array is empty.(A macro define has replace this.)
//FRISO_API int array_list_empty( friso_array_t array )
//FRISO_API int array_list_empty( friso_array_t array )
//{
// return ( array->length == 0 );
//}

View File

@ -1,6 +1,6 @@
/**
* friso string type check functions,
* like english/CJK, full-wdith/half-width, punctuation or not.
* friso string type check functions,
* like english/CJK, full-wdith/half-width, punctuation or not.
* @see friso_UTF8.c and friso_GBK.c for detail.
*
* @author lionsoul<chenxin619315@gmail.com>
@ -13,16 +13,15 @@
#include "friso_API.h"
/* check if the specified string is a cn string.
*
*
* @return int (true for cn string or false)
* */
FRISO_API int friso_cn_string(
friso_charset_t charset,
friso_task_t task )
{
if ( charset == FRISO_UTF8 ) {
FRISO_API int friso_cn_string(
friso_charset_t charset,
friso_task_t task) {
if(charset == FRISO_UTF8) {
return utf8_cjk_string(task->unicode);
} else if ( charset == FRISO_GBK ) {
} else if(charset == FRISO_GBK) {
return gbk_cn_string(task->buffer);
}
@ -30,13 +29,12 @@ FRISO_API int friso_cn_string(
}
//check if the specified word is a whitespace.
FRISO_API int friso_whitespace(
friso_charset_t charset,
friso_task_t task )
{
if ( charset == FRISO_UTF8 ) {
FRISO_API int friso_whitespace(
friso_charset_t charset,
friso_task_t task) {
if(charset == FRISO_UTF8) {
return utf8_whitespace(task->unicode);
} else if ( charset == FRISO_GBK ) {
} else if(charset == FRISO_GBK) {
return gbk_whitespace(task->buffer);
}
@ -46,11 +44,10 @@ FRISO_API int friso_whitespace(
//check if the specifiled word is a numeric letter.
FRISO_API int friso_numeric_letter(
friso_charset_t charset,
friso_task_t task)
{
if ( charset == FRISO_UTF8 ) {
friso_task_t task) {
if(charset == FRISO_UTF8) {
return utf8_numeric_letter((uint_t) task->text[task->idx]);
} else if ( charset == FRISO_GBK ) {
} else if(charset == FRISO_GBK) {
return gbk_numeric_letter(task->text + task->idx);
}
@ -58,14 +55,13 @@ FRISO_API int friso_numeric_letter(
}
//check if the specified word is aa english letter.
FRISO_API int friso_en_letter(
friso_charset_t charset,
friso_task_t task )
{
if ( charset == FRISO_UTF8 ) {
return utf8_en_letter( ( uint_t ) task->text[task->idx]);
} else if ( charset == FRISO_GBK ) {
return gbk_en_letter( task->text + task->idx );
FRISO_API int friso_en_letter(
friso_charset_t charset,
friso_task_t task) {
if(charset == FRISO_UTF8) {
return utf8_en_letter((uint_t) task->text[task->idx]);
} else if(charset == FRISO_GBK) {
return gbk_en_letter(task->text + task->idx);
}
return 0;
@ -73,13 +69,12 @@ FRISO_API int friso_en_letter(
//check if the specified word is a half-width letter.
// punctuations are inclued.
FRISO_API int friso_halfwidth_en_char(
friso_charset_t charset,
friso_task_t task )
{
if ( charset == FRISO_UTF8 ) {
FRISO_API int friso_halfwidth_en_char(
friso_charset_t charset,
friso_task_t task) {
if(charset == FRISO_UTF8) {
return utf8_halfwidth_en_char(task->unicode);
} else if ( charset == FRISO_GBK ) {
} else if(charset == FRISO_GBK) {
return gbk_halfwidth_en_char(task->buffer[0]);
}
@ -88,14 +83,13 @@ FRISO_API int friso_halfwidth_en_char(
//check if the specified word is a full-width letter.
// full-width punctuations are not included.
FRISO_API int friso_fullwidth_en_char(
friso_charset_t charset,
friso_task_t task )
{
if ( charset == FRISO_UTF8 ) {
return utf8_fullwidth_en_char( task->unicode );
} else if ( charset == FRISO_GBK ) {
return gbk_fullwidth_en_char( task->buffer );
FRISO_API int friso_fullwidth_en_char(
friso_charset_t charset,
friso_task_t task) {
if(charset == FRISO_UTF8) {
return utf8_fullwidth_en_char(task->unicode);
} else if(charset == FRISO_GBK) {
return gbk_fullwidth_en_char(task->buffer);
}
return 0;
@ -104,49 +98,45 @@ FRISO_API int friso_fullwidth_en_char(
//check if the specified word is an english punctuations.
FRISO_API int friso_en_punctuation(
friso_charset_t charset,
friso_task_t task )
{
if ( charset == FRISO_UTF8 ) {
return utf8_en_punctuation( task->unicode );
} else if ( charset == FRISO_GBK ) {
return gbk_en_punctuation( task->buffer[0] );
friso_task_t task) {
if(charset == FRISO_UTF8) {
return utf8_en_punctuation(task->unicode);
} else if(charset == FRISO_GBK) {
return gbk_en_punctuation(task->buffer[0]);
}
return 0;
}
//check if the specified word ia sn chinese punctuation.
FRISO_API int friso_cn_punctuation(
friso_charset_t charset,
friso_task_t task )
{
if ( charset == FRISO_UTF8 ) {
return utf8_cn_punctuation( task->unicode );
} else if ( charset == FRISO_GBK ) {
return gbk_cn_punctuation( task->buffer );
FRISO_API int friso_cn_punctuation(
friso_charset_t charset,
friso_task_t task) {
if(charset == FRISO_UTF8) {
return utf8_cn_punctuation(task->unicode);
} else if(charset == FRISO_GBK) {
return gbk_cn_punctuation(task->buffer);
}
return 0;
}
FRISO_API int friso_letter_number(
friso_charset_t charset,
friso_task_t task )
{
FRISO_API int friso_letter_number(
friso_charset_t charset,
friso_task_t task) {
return 0;
}
FRISO_API int friso_other_number(
friso_charset_t charset,
friso_task_t task )
{
FRISO_API int friso_other_number(
friso_charset_t charset,
friso_task_t task) {
return 0;
}
//check if the word is a keep punctuation.
//@Deprecated
//FRISO_API int friso_keep_punctuation(
// friso_charset_t charset,
//FRISO_API int friso_keep_punctuation(
// friso_charset_t charset,
// friso_task_t task )
//{
// if ( charset == FRISO_UTF8 )
@ -158,40 +148,37 @@ FRISO_API int friso_other_number(
//check if the specified char is en english punctuation.
// this function is the same as friso_en_punctuation.
FRISO_API int is_en_punctuation(
friso_charset_t charset, char c )
{
if ( charset == FRISO_UTF8 ) {
return utf8_en_punctuation( (uint_t) c);
} else if ( charset == FRISO_GBK ) {
return gbk_en_punctuation( c );
FRISO_API int is_en_punctuation(
friso_charset_t charset, char c) {
if(charset == FRISO_UTF8) {
return utf8_en_punctuation((uint_t) c);
} else if(charset == FRISO_GBK) {
return gbk_en_punctuation(c);
}
return 0;
}
//check the specified string is make up with numeric.
FRISO_API int friso_numeric_string(
friso_charset_t charset,
char *buffer )
{
if ( charset == FRISO_UTF8 ) {
return utf8_numeric_string( buffer );
} else if ( charset == FRISO_GBK ) {
return gbk_numeric_string( buffer );
//check the specified string is make up with numeric.
FRISO_API int friso_numeric_string(
friso_charset_t charset,
char *buffer) {
if(charset == FRISO_UTF8) {
return utf8_numeric_string(buffer);
} else if(charset == FRISO_GBK) {
return gbk_numeric_string(buffer);
}
return 0;
}
//check the specified string is a decimal string.
FRISO_API int friso_decimal_string(
friso_charset_t charset, char *buffer )
{
if ( charset == FRISO_UTF8 ) {
return utf8_decimal_string( buffer );
} else if ( charset == FRISO_GBK ) {
return gbk_decimal_string( buffer );
FRISO_API int friso_decimal_string(
friso_charset_t charset, char *buffer) {
if(charset == FRISO_UTF8) {
return utf8_decimal_string(buffer);
} else if(charset == FRISO_GBK) {
return gbk_decimal_string(buffer);
}
return 0;
@ -199,14 +186,13 @@ FRISO_API int friso_decimal_string(
//check if the specified char is english uppercase letter.
// included full-width and half-width letters.
FRISO_API int friso_uppercase_letter(
friso_charset_t charset,
friso_task_t task )
{
if ( charset == FRISO_UTF8 ) {
return utf8_uppercase_letter( task->unicode );
} else if ( charset == FRISO_GBK ) {
return gbk_uppercase_letter( task->buffer );
FRISO_API int friso_uppercase_letter(
friso_charset_t charset,
friso_task_t task) {
if(charset == FRISO_UTF8) {
return utf8_uppercase_letter(task->unicode);
} else if(charset == FRISO_GBK) {
return gbk_uppercase_letter(task->buffer);
}
return 0;
@ -216,27 +202,26 @@ FRISO_API int friso_uppercase_letter(
* the type will be the constants defined above.
* (include the fullwidth english char.)
*/
FRISO_API friso_enchar_t friso_enchar_type(
friso_charset_t charset,
friso_task_t task )
{
FRISO_API friso_enchar_t friso_enchar_type(
friso_charset_t charset,
friso_task_t task) {
//Unicode or ASCII.(Both UTF-8 and GBK are valid)
uint_t u = 0;
if ( charset == FRISO_UTF8 ) {
if(charset == FRISO_UTF8) {
u = task->unicode;
//if ( u >= 65280 ) u = 65280 - 65248;
} else if ( charset == FRISO_GBK ) {
} else if(charset == FRISO_GBK) {
u = (uchar_t)task->buffer[0];
//if ( u == 0xa3 ) ; //full-width.
}
//range check.
if ( u > 126 || u < 32 ) return FRISO_EN_UNKNOW;
if ( u == 32 ) return FRISO_EN_WHITESPACE;
if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC;
if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER;
if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER;
if(u > 126 || u < 32) return FRISO_EN_UNKNOW;
if(u == 32) return FRISO_EN_WHITESPACE;
if(u >= 48 && u <= 57) return FRISO_EN_NUMERIC;
if(u >= 65 && u <= 90) return FRISO_EN_LETTER;
if(u >= 97 && u <= 122) return FRISO_EN_LETTER;
return FRISO_EN_PUNCTUATION;
}
@ -245,16 +230,15 @@ FRISO_API friso_enchar_t friso_enchar_type(
* the type will be the constants defined above.
* (the char should be half-width english char only)
*/
FRISO_API friso_enchar_t get_enchar_type( char ch )
{
FRISO_API friso_enchar_t get_enchar_type(char ch) {
uint_t u = (uchar_t) ch;
//range check.
if ( u > 126 || u < 32 ) return FRISO_EN_UNKNOW;
if ( u == 32 ) return FRISO_EN_WHITESPACE;
if ( u >= 48 && u <= 57 ) return FRISO_EN_NUMERIC;
if ( u >= 65 && u <= 90 ) return FRISO_EN_LETTER;
if ( u >= 97 && u <= 122 ) return FRISO_EN_LETTER;
if(u > 126 || u < 32) return FRISO_EN_UNKNOW;
if(u == 32) return FRISO_EN_WHITESPACE;
if(u >= 48 && u <= 57) return FRISO_EN_NUMERIC;
if(u >= 65 && u <= 90) return FRISO_EN_LETTER;
if(u >= 97 && u <= 122) return FRISO_EN_LETTER;
return FRISO_EN_PUNCTUATION;
}

View File

@ -19,50 +19,50 @@
/** {{{ wrap interface */
/* check if the specified string is a cn string.
*
*
* @return int (true for cn string or false)
* */
FRISO_API int friso_cn_string( friso_charset_t, friso_task_t );
FRISO_API int friso_cn_string(friso_charset_t, friso_task_t);
//check if the specified word is a whitespace.
FRISO_API int friso_whitespace( friso_charset_t, friso_task_t );
FRISO_API int friso_whitespace(friso_charset_t, friso_task_t);
//check if the specifiled word is a numeric letter.
FRISO_API int friso_numeric_letter(friso_charset_t, friso_task_t);
//check if the specified word is a english letter.
FRISO_API int friso_en_letter( friso_charset_t, friso_task_t );
FRISO_API int friso_en_letter(friso_charset_t, friso_task_t);
//check if the specified word is a half-width letter.
// punctuations are inclued.
FRISO_API int friso_halfwidth_en_char( friso_charset_t, friso_task_t );
FRISO_API int friso_halfwidth_en_char(friso_charset_t, friso_task_t);
//check if the specified word is a full-width letter.
// full-width punctuations are not included.
FRISO_API int friso_fullwidth_en_char( friso_charset_t, friso_task_t );
FRISO_API int friso_fullwidth_en_char(friso_charset_t, friso_task_t);
//check if the specified word is an english punctuations.
FRISO_API int friso_en_punctuation( friso_charset_t, friso_task_t );
FRISO_API int friso_en_punctuation(friso_charset_t, friso_task_t);
//check if the specified word ia sn chinese punctuation.
FRISO_API int friso_cn_punctuation( friso_charset_t, friso_task_t );
FRISO_API int friso_cn_punctuation(friso_charset_t, friso_task_t);
FRISO_API int friso_letter_number( friso_charset_t, friso_task_t );
FRISO_API int friso_other_number( friso_charset_t, friso_task_t );
FRISO_API int friso_letter_number(friso_charset_t, friso_task_t);
FRISO_API int friso_other_number(friso_charset_t, friso_task_t);
//check if the word is a keep punctuation.
//@Deprecated
//FRISO_API int friso_keep_punctuation( friso_charset_t, friso_task_t );
//check the specified string is numeric string.
FRISO_API int friso_numeric_string( friso_charset_t, char * );
FRISO_API int friso_numeric_string(friso_charset_t, char *);
//check the specified string is a decimal string.
FRISO_API int friso_decimal_string( friso_charset_t, char * );
FRISO_API int friso_decimal_string(friso_charset_t, char *);
//check if the specified char is english uppercase letter.
// included full-width and half-width letters.
FRISO_API int friso_uppercase_letter( friso_charset_t, friso_task_t );
FRISO_API int friso_uppercase_letter(friso_charset_t, friso_task_t);
//en char type.
@ -83,13 +83,13 @@ typedef enum {
* the type will be the constants defined above.
* (include the fullwidth english char.)
*/
FRISO_API friso_enchar_t friso_enchar_type( friso_charset_t, friso_task_t );
FRISO_API friso_enchar_t friso_enchar_type(friso_charset_t, friso_task_t);
/* get the type of the specified en char.
* the type will be the constants defined above.
* (the char should be half-width english char only)
*/
FRISO_API friso_enchar_t get_enchar_type( char );
FRISO_API friso_enchar_t get_enchar_type(char);
/* }}} */
@ -102,76 +102,76 @@ FRISO_API friso_enchar_t get_enchar_type( char );
*
* @return int the bytes of the current readed word.
*/
FRISO_API int utf8_next_word( friso_task_t, uint_t *, fstring );
FRISO_API int utf8_next_word(friso_task_t, uint_t *, fstring);
//get the bytes of a utf-8 char.
FRISO_API int get_utf8_bytes( char );
FRISO_API int get_utf8_bytes(char);
//return the unicode serial number of a given string.
FRISO_API int get_utf8_unicode( const fstring );
FRISO_API int get_utf8_unicode(const fstring);
//convert the unicode serial to a utf-8 string.
FRISO_API int unicode_to_utf8( uint_t, fstring );
FRISO_API int unicode_to_utf8(uint_t, fstring);
//check if the given char is a CJK.
FRISO_API int utf8_cjk_string( uint_t ) ;
FRISO_API int utf8_cjk_string(uint_t) ;
/*check the given char is a Basic Latin letter or not.
* include all the letters and english puntuations.*/
FRISO_API int utf8_halfwidth_en_char( uint_t );
FRISO_API int utf8_halfwidth_en_char(uint_t);
/*
* check the given char is a full-width latain or not.
* include the full-width arabic numeber, letters.
* but not the full-width puntuations.
*/
FRISO_API int utf8_fullwidth_en_char( uint_t );
FRISO_API int utf8_fullwidth_en_char(uint_t);
//check the given char is a upper case letter or not.
// included all the full-width and half-width letters.
FRISO_API int utf8_uppercase_letter( uint_t );
FRISO_API int utf8_uppercase_letter(uint_t);
//check the given char is a lower case letter or not.
// included all the full-width and half-width letters.
FRISO_API int utf8_lowercase_letter( uint_t );
FRISO_API int utf8_lowercase_letter(uint_t);
//check the given char is a numeric.
// included the full-width and half-width arabic numeric.
FRISO_API int utf8_numeric_letter( uint_t );
FRISO_API int utf8_numeric_letter(uint_t);
/*
* check if the given fstring is make up with numeric chars.
* both full-width,half-width numeric is ok.
*/
FRISO_API int utf8_numeric_string( char * );
FRISO_API int utf8_numeric_string(char *);
FRISO_API int utf8_decimal_string( char * );
FRISO_API int utf8_decimal_string(char *);
//check the given char is a english char.
//(full-width and half-width)
//not the punctuation of course.
FRISO_API int utf8_en_letter( uint_t );
FRISO_API int utf8_en_letter(uint_t);
//check the given char is a whitespace or not.
FRISO_API int utf8_whitespace( uint_t );
FRISO_API int utf8_whitespace(uint_t);
/* check if the given char is a letter number like 'ⅠⅡ'
*/
FRISO_API int utf8_letter_number( uint_t );
FRISO_API int utf8_letter_number(uint_t);
/*
* check if the given char is a other number like ''
*/
FRISO_API int utf8_other_number( uint_t );
FRISO_API int utf8_other_number(uint_t);
//check if the given char is a english punctuation.
FRISO_API int utf8_en_punctuation( uint_t ) ;
FRISO_API int utf8_en_punctuation(uint_t) ;
//check if the given char is a chinese punctuation.
FRISO_API int utf8_cn_punctuation( uint_t u );
FRISO_API int utf8_cn_punctuation(uint_t u);
FRISO_API int is_en_punctuation( friso_charset_t, char );
//#define is_en_punctuation( c ) utf8_en_punctuation((uint_t) c)
FRISO_API int is_en_punctuation(friso_charset_t, char);
//#define is_en_punctuation( c ) utf8_en_punctuation((uint_t) c)
//@Deprecated
//FRISO_API int utf8_keep_punctuation( fstring );
@ -186,67 +186,67 @@ FRISO_API int is_en_punctuation( friso_charset_t, char );
*
* @return int the bytes of the current readed word.
*/
FRISO_API int gbk_next_word( friso_task_t, uint_t *, fstring );
FRISO_API int gbk_next_word(friso_task_t, uint_t *, fstring);
//get the bytes of a utf-8 char.
FRISO_API int get_gbk_bytes( char );
FRISO_API int get_gbk_bytes(char);
//check if the given char is a gbk char (ANSII string).
FRISO_API int gbk_cn_string( char * ) ;
FRISO_API int gbk_cn_string(char *) ;
/*check if the given char is a ASCII letter
* include all the letters and english puntuations.*/
FRISO_API int gbk_halfwidth_en_char( char );
FRISO_API int gbk_halfwidth_en_char(char);
/*
* check if the given char is a full-width latain.
* include the full-width arabic numeber, letters.
* but not the full-width puntuations.
*/
FRISO_API int gbk_fullwidth_en_char( char * );
FRISO_API int gbk_fullwidth_en_char(char *);
//check if the given char is a upper case char.
// included all the full-width and half-width letters.
FRISO_API int gbk_uppercase_letter( char * );
FRISO_API int gbk_uppercase_letter(char *);
//check if the given char is a lower case char.
// included all the full-width and half-width letters.
FRISO_API int gbk_lowercase_letter( char * );
FRISO_API int gbk_lowercase_letter(char *);
//check if the given char is a numeric.
// included the full-width and half-width arabic numeric.
FRISO_API int gbk_numeric_letter( char * );
FRISO_API int gbk_numeric_letter(char *);
/*
* check if the given fstring is make up with numeric chars.
* both full-width,half-width numeric is ok.
*/
FRISO_API int gbk_numeric_string( char * );
FRISO_API int gbk_numeric_string(char *);
FRISO_API int gbk_decimal_string( char * );
FRISO_API int gbk_decimal_string(char *);
//check if the given char is a english(ASCII) char.
//(full-width and half-width)
//not the punctuation of course.
FRISO_API int gbk_en_letter( char * );
FRISO_API int gbk_en_letter(char *);
//check the specified char is a whitespace or not.
FRISO_API int gbk_whitespace( char * );
FRISO_API int gbk_whitespace(char *);
/* check if the given char is a letter number like 'ⅠⅡ'
*/
FRISO_API int gbk_letter_number( char * );
FRISO_API int gbk_letter_number(char *);
/*
* check if the given char is a other number like ''
*/
FRISO_API int gbk_other_number( char * );
FRISO_API int gbk_other_number(char *);
//check if the given char is a english punctuation.
FRISO_API int gbk_en_punctuation( char ) ;
FRISO_API int gbk_en_punctuation(char) ;
//check the given char is a chinese punctuation.
FRISO_API int gbk_cn_punctuation( char * );
FRISO_API int gbk_cn_punctuation(char *);
//cause the logic handle is the same as the utf8.
// here invoke the utf8 interface directly.

View File

@ -16,32 +16,30 @@
/* ************************
* mapping function area *
**************************/
__STATIC_API__ uint_t hash( fstring str, uint_t length )
{
__STATIC_API__ uint_t hash(fstring str, uint_t length) {
//hash code
uint_t h = 0;
while ( *str != '\0' ) {
h = h * HASH_FACTOR + ( *str++ );
while(*str != '\0') {
h = h * HASH_FACTOR + (*str++);
}
return (h % length);
}
/*test if a integer is a prime.*/
__STATIC_API__ int is_prime( int n )
{
__STATIC_API__ int is_prime(int n) {
int j;
if ( n == 2 || n == 3 ) {
if(n == 2 || n == 3) {
return 1;
}
if ( n == 1 || n % 2 == 0 ) {
if(n == 1 || n % 2 == 0) {
return 0;
}
for ( j = 3; j * j < n; j++ ) {
if ( n % j == 0 ) {
for(j = 3; j * j < n; j++) {
if(n % j == 0) {
return 0;
}
}
@ -50,10 +48,9 @@ __STATIC_API__ int is_prime( int n )
}
/*get the next prime just after the speicified integer.*/
__STATIC_API__ int next_prime( int n )
{
if ( n % 2 == 0 ) n++;
for ( ; ! is_prime( n ); n = n + 2 ) ;
__STATIC_API__ int next_prime(int n) {
if(n % 2 == 0) n++;
for(; ! is_prime(n); n = n + 2) ;
return n;
}
@ -76,14 +73,13 @@ __STATIC_API__ int next_prime( int n )
/* *********************************
* static hashtable function area. *
***********************************/
__STATIC_API__ hash_entry_t new_hash_entry(
fstring key,
void * value,
hash_entry_t next )
{
hash_entry_t e = ( hash_entry_t )
FRISO_MALLOC( sizeof( friso_hash_entry ) );
if ( e == NULL ) {
__STATIC_API__ hash_entry_t new_hash_entry(
fstring key,
void * value,
hash_entry_t next) {
hash_entry_t e = (hash_entry_t)
FRISO_MALLOC(sizeof(friso_hash_entry));
if(e == NULL) {
___ALLOCATION_ERROR___
}
@ -96,16 +92,15 @@ __STATIC_API__ hash_entry_t new_hash_entry(
}
//create blocks copy of entries.
__STATIC_API__ hash_entry_t * create_hash_entries( uint_t blocks )
{
__STATIC_API__ hash_entry_t * create_hash_entries(uint_t blocks) {
register uint_t t;
hash_entry_t *e = ( hash_entry_t * )
FRISO_CALLOC( sizeof( hash_entry_t ), blocks );
if ( e == NULL ) {
hash_entry_t *e = (hash_entry_t *)
FRISO_CALLOC(sizeof(hash_entry_t), blocks);
if(e == NULL) {
___ALLOCATION_ERROR___
}
for ( t = 0; t < blocks; t++ ) {
for(t = 0; t < blocks; t++) {
e[t] = NULL;
}
@ -113,35 +108,34 @@ __STATIC_API__ hash_entry_t * create_hash_entries( uint_t blocks )
}
//a static function to do the re-hash work.
__STATIC_API__ void rebuild_hash( friso_hash_t _hash )
{
__STATIC_API__ void rebuild_hash(friso_hash_t _hash) {
//printf("rehashed.\n");
//find the next prime as the length of the hashtable.
uint_t t, length = next_prime( _hash->length * 2 + 1 );
uint_t t, length = next_prime(_hash->length * 2 + 1);
hash_entry_t e, next, *_src = _hash->table, \
*table = create_hash_entries( length );
*table = create_hash_entries(length);
uint_t bucket;
//copy the nodes
for ( t = 0; t < _hash->length; t++ ) {
e = *( _src + t );
if ( e != NULL ) {
for(t = 0; t < _hash->length; t++) {
e = *(_src + t);
if(e != NULL) {
do {
next = e->_next;
bucket = hash( e->_key, length );
bucket = hash(e->_key, length);
e->_next = table[bucket];
table[bucket] = e;
e = next;
} while ( e != NULL );
} while(e != NULL);
}
}
_hash->table = table;
_hash->length = length;
_hash->threshold = ( uint_t ) ( _hash->length * _hash->factor );
_hash->threshold = (uint_t)(_hash->length * _hash->factor);
//free the old hash_entry_t blocks allocations.
FRISO_FREE( _src );
FRISO_FREE(_src);
}
/* ********************************
@ -149,10 +143,9 @@ __STATIC_API__ void rebuild_hash( friso_hash_t _hash )
* ********************************/
//create a new hash table.
FRISO_API friso_hash_t new_hash_table( void )
{
friso_hash_t _hash = ( friso_hash_t ) FRISO_MALLOC( sizeof ( friso_hash_cdt ) );
if ( _hash == NULL ) {
FRISO_API friso_hash_t new_hash_table(void) {
friso_hash_t _hash = (friso_hash_t) FRISO_MALLOC(sizeof(friso_hash_cdt));
if(_hash == NULL) {
___ALLOCATION_ERROR___
}
@ -160,51 +153,49 @@ FRISO_API friso_hash_t new_hash_table( void )
_hash->length = DEFAULT_LENGTH;
_hash->size = 0;
_hash->factor = DEFAULT_FACTOR;
_hash->threshold = ( uint_t ) ( _hash->length * _hash->factor );
_hash->table = create_hash_entries( _hash->length );
_hash->threshold = (uint_t)(_hash->length * _hash->factor);
_hash->table = create_hash_entries(_hash->length);
return _hash;
}
FRISO_API void free_hash_table(
friso_hash_t _hash,
fhash_callback_fn_t fentry_func )
{
FRISO_API void free_hash_table(
friso_hash_t _hash,
fhash_callback_fn_t fentry_func) {
register uint_t j;
hash_entry_t e, n;
for ( j = 0; j < _hash->length; j++ ) {
e = *( _hash->table + j );
for ( ; e != NULL ; ) {
for(j = 0; j < _hash->length; j++) {
e = *(_hash->table + j);
for(; e != NULL ;) {
n = e->_next;
if ( fentry_func != NULL ) fentry_func(e);
FRISO_FREE( e );
if(fentry_func != NULL) fentry_func(e);
FRISO_FREE(e);
e = n;
}
}
//free the pointer array block ( 4 * htable->length continuous bytes ).
FRISO_FREE( _hash->table );
FRISO_FREE( _hash );
FRISO_FREE(_hash->table);
FRISO_FREE(_hash);
}
//put a new mapping insite.
//the value cannot be NULL.
FRISO_API void *hash_put_mapping(
friso_hash_t _hash,
fstring key,
void * value )
{
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
hash_entry_t e = *( _hash->table + bucket );
FRISO_API void *hash_put_mapping(
friso_hash_t _hash,
fstring key,
void * value) {
uint_t bucket = (key == NULL) ? 0 : hash(key, _hash->length);
hash_entry_t e = *(_hash->table + bucket);
void *oval = NULL;
//check the given key is already exists or not.
for ( ; e != NULL; e = e->_next ) {
if ( key == e->_key
|| ( key != NULL && e->_key != NULL
&& strcmp( key, e->_key ) == 0 ) ) {
for(; e != NULL; e = e->_next) {
if(key == e->_key
|| (key != NULL && e->_key != NULL
&& strcmp(key, e->_key) == 0)) {
oval = e->_val; //bak the old value
e->_key = key;
e->_val = value;
@ -213,29 +204,28 @@ FRISO_API void *hash_put_mapping(
}
//put a new mapping into the hashtable.
_hash->table[bucket] = new_hash_entry( key, value, _hash->table[bucket] );
_hash->table[bucket] = new_hash_entry(key, value, _hash->table[bucket]);
_hash->size++;
//check the condition to rebuild the hashtable.
if ( _hash->size >= _hash->threshold ) {
rebuild_hash( _hash );
if(_hash->size >= _hash->threshold) {
rebuild_hash(_hash);
}
return oval;
}
//check the existence of the mapping associated with the given key.
FRISO_API int hash_exist_mapping(
friso_hash_t _hash, fstring key )
{
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
FRISO_API int hash_exist_mapping(
friso_hash_t _hash, fstring key) {
uint_t bucket = (key == NULL) ? 0 : hash(key, _hash->length);
hash_entry_t e;
for ( e = *( _hash->table + bucket );
e != NULL; e = e->_next ) {
if ( key == e->_key
|| ( key != NULL && e->_key != NULL
&& strcmp( key, e->_key ) == 0 )) {
for(e = *(_hash->table + bucket);
e != NULL; e = e->_next) {
if(key == e->_key
|| (key != NULL && e->_key != NULL
&& strcmp(key, e->_key) == 0)) {
return 1;
}
}
@ -244,16 +234,15 @@ FRISO_API int hash_exist_mapping(
}
//get the value associated with the given key.
FRISO_API void *hash_get_value( friso_hash_t _hash, fstring key )
{
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
FRISO_API void *hash_get_value(friso_hash_t _hash, fstring key) {
uint_t bucket = (key == NULL) ? 0 : hash(key, _hash->length);
hash_entry_t e;
for ( e = *( _hash->table + bucket );
e != NULL; e = e->_next ) {
if ( key == e->_key
|| ( key != NULL && e->_key != NULL
&& strcmp( key, e->_key ) == 0 )) {
for(e = *(_hash->table + bucket);
e != NULL; e = e->_next) {
if(key == e->_key
|| (key != NULL && e->_key != NULL
&& strcmp(key, e->_key) == 0)) {
return e->_val;
}
}
@ -262,21 +251,20 @@ FRISO_API void *hash_get_value( friso_hash_t _hash, fstring key )
}
//remove the mapping associated with the given key.
FRISO_API hash_entry_t hash_remove_mapping(
friso_hash_t _hash, fstring key )
{
uint_t bucket = ( key == NULL ) ? 0 : hash( key, _hash->length );
FRISO_API hash_entry_t hash_remove_mapping(
friso_hash_t _hash, fstring key) {
uint_t bucket = (key == NULL) ? 0 : hash(key, _hash->length);
hash_entry_t e, prev = NULL;
hash_entry_t b;
for ( e = *( _hash->table + bucket );
e != NULL; prev = e, e = e->_next ) {
if ( key == e->_key
|| ( key != NULL && e->_key != NULL
&& strcmp( key, e->_key ) == 0 ) ) {
for(e = *(_hash->table + bucket);
e != NULL; prev = e, e = e->_next) {
if(key == e->_key
|| (key != NULL && e->_key != NULL
&& strcmp(key, e->_key) == 0)) {
b = e;
//the node located at *( htable->table + bucket )
if ( prev == NULL ) {
if(prev == NULL) {
_hash->table[bucket] = e->_next;
} else {
prev->_next = e->_next;

View File

@ -1,7 +1,7 @@
/*
* friso lexicon functions implementation.
* used to deal with the friso lexicon, like: load,remove,match...
*
*
* @author lionsoul<chenxin619315@gmail.com>
*/
@ -15,16 +15,15 @@
#define __FRISO_LEX_IFILE__ "friso.lex.ini"
//create a new lexicon
FRISO_API friso_dic_t friso_dic_new()
{
FRISO_API friso_dic_t friso_dic_new() {
register uint_t t;
friso_dic_t dic = ( friso_dic_t ) FRISO_CALLOC(
sizeof( friso_hash_t ), __FRISO_LEXICON_LENGTH__ );
if ( dic == NULL ) {
friso_dic_t dic = (friso_dic_t) FRISO_CALLOC(
sizeof(friso_hash_t), __FRISO_LEXICON_LENGTH__);
if(dic == NULL) {
___ALLOCATION_ERROR___
}
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
for(t = 0; t < __FRISO_LEXICON_LENGTH__; t++) {
dic[t] = new_hash_table();
}
@ -33,24 +32,23 @@ FRISO_API friso_dic_t friso_dic_new()
/**
* default callback function to invoke
* when free the friso dictionary .
* when free the friso dictionary .
*
* @date 2013-06-12
*/
__STATIC_API__ void default_fdic_callback( hash_entry_t e )
{
__STATIC_API__ void default_fdic_callback(hash_entry_t e) {
register uint_t i;
friso_array_t syn;
lex_entry_t lex = ( lex_entry_t ) e->_val;
lex_entry_t lex = (lex_entry_t) e->_val;
//free the lex->word
FRISO_FREE( lex->word );
FRISO_FREE(lex->word);
//free the lex->syn if it is not NULL
if ( lex->syn != NULL ) {
if(lex->syn != NULL) {
syn = lex->syn;
for ( i = 0; i < syn->length; i++ ) {
FRISO_FREE( syn->items[i] );
for(i = 0; i < syn->length; i++) {
FRISO_FREE(syn->items[i]);
}
free_array_list( syn );
free_array_list(syn);
}
//free the e->_val
@ -58,29 +56,27 @@ __STATIC_API__ void default_fdic_callback( hash_entry_t e )
FRISO_FREE(lex);
}
FRISO_API void friso_dic_free( friso_dic_t dic )
{
FRISO_API void friso_dic_free(friso_dic_t dic) {
register uint_t t;
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
for(t = 0; t < __FRISO_LEXICON_LENGTH__; t++) {
//free the hash table
free_hash_table( dic[t], default_fdic_callback );
free_hash_table(dic[t], default_fdic_callback);
}
FRISO_FREE( dic );
FRISO_FREE(dic);
}
//create a new lexicon entry
FRISO_API lex_entry_t new_lex_entry(
fstring word,
friso_array_t syn,
uint_t fre,
uint_t length,
uint_t type )
{
lex_entry_t e = ( lex_entry_t )
FRISO_MALLOC( sizeof( lex_entry_cdt ) );
if ( e == NULL ) {
FRISO_API lex_entry_t new_lex_entry(
fstring word,
friso_array_t syn,
uint_t fre,
uint_t length,
uint_t type) {
lex_entry_t e = (lex_entry_t)
FRISO_MALLOC(sizeof(lex_entry_cdt));
if(e == NULL) {
___ALLOCATION_ERROR___
}
@ -107,20 +103,19 @@ FRISO_API lex_entry_t new_lex_entry(
* 3. free its pos. (friso_array_t)
* 4. free the lex_entry_t.
*/
FRISO_API void free_lex_entry_full( lex_entry_t e )
{
FRISO_API void free_lex_entry_full(lex_entry_t e) {
register uint_t i;
friso_array_t syn;
//free the lex->word
FRISO_FREE( e->word );
FRISO_FREE(e->word);
//free the lex->syn if it is not NULL
if ( e->syn != NULL ) {
if(e->syn != NULL) {
syn = e->syn;
for ( i = 0; i < syn->length; i++ ) {
FRISO_FREE( syn->items[i] );
for(i = 0; i < syn->length; i++) {
FRISO_FREE(syn->items[i]);
}
free_array_list( syn );
free_array_list(syn);
}
//free the e->_val
@ -128,8 +123,7 @@ FRISO_API void free_lex_entry_full( lex_entry_t e )
FRISO_FREE(e);
}
FRISO_API void free_lex_entry( lex_entry_t e )
{
FRISO_API void free_lex_entry(lex_entry_t e) {
//if ( e->syn != NULL ) {
// if ( flag == 1 ) free_array_list( e->syn);
// else free_array_list( e->syn );
@ -140,37 +134,35 @@ FRISO_API void free_lex_entry( lex_entry_t e )
//add a new entry to the dictionary.
FRISO_API void friso_dic_add(
friso_dic_t dic,
friso_lex_t lex,
fstring word,
friso_array_t syn )
{
FRISO_API void friso_dic_add(
friso_dic_t dic,
friso_lex_t lex,
fstring word,
friso_array_t syn) {
void *olex = NULL;
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
if(lex >= 0 && lex < __FRISO_LEXICON_LENGTH__) {
//printf("lex=%d, word=%s, syn=%s\n", lex, word, syn);
olex = hash_put_mapping( dic[lex], word,
new_lex_entry( word, syn, 0,
(uint_t) strlen(word), (uint_t) lex ) );
if ( olex != NULL ) {
olex = hash_put_mapping(dic[lex], word,
new_lex_entry(word, syn, 0,
(uint_t) strlen(word), (uint_t) lex));
if(olex != NULL) {
free_lex_entry_full((lex_entry_t)olex);
}
}
}
FRISO_API void friso_dic_add_with_fre(
friso_dic_t dic,
friso_lex_t lex,
fstring word,
friso_array_t syn,
uint_t frequency )
{
FRISO_API void friso_dic_add_with_fre(
friso_dic_t dic,
friso_lex_t lex,
fstring word,
friso_array_t syn,
uint_t frequency) {
void *olex = NULL;
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
olex = hash_put_mapping( dic[lex], word,
new_lex_entry( word, syn, frequency,
( uint_t ) strlen(word), ( uint_t ) lex ) );
if ( olex != NULL ) {
if(lex >= 0 && lex < __FRISO_LEXICON_LENGTH__) {
olex = hash_put_mapping(dic[lex], word,
new_lex_entry(word, syn, frequency,
(uint_t) strlen(word), (uint_t) lex));
if(olex != NULL) {
free_lex_entry_full((lex_entry_t)olex);
}
}
@ -179,39 +171,37 @@ FRISO_API void friso_dic_add_with_fre(
/*
* read a line from a specified stream.
* the newline will be cleared.
*
* @date 2012-11-24
*
* @date 2012-11-24
*/
FRISO_API fstring file_get_line( fstring __dst, FILE * _stream )
{
FRISO_API fstring file_get_line(fstring __dst, FILE * _stream) {
register int c;
fstring cs;
cs = __dst;
while ( ( c = fgetc( _stream ) ) != EOF ) {
if ( c == '\n' ) break;
*cs++ = c;
while((c = fgetc(_stream)) != EOF) {
if(c == '\n') break;
*cs++ = c;
}
*cs = '\0';
return ( c == EOF && cs == __dst ) ? NULL : __dst;
return (c == EOF && cs == __dst) ? NULL : __dst;
}
/*
* static function to copy a string.
* static function to copy a string.
*/
///instead of memcpy
__STATIC_API__ fstring string_copy(
fstring _src,
fstring __dst,
uint_t blocks )
{
__STATIC_API__ fstring string_copy(
fstring _src,
fstring __dst,
uint_t blocks) {
register fstring __src = _src;
register uint_t t;
for ( t = 0; t < blocks; t++ ) {
if ( *__src == '\0' ) break;
for(t = 0; t < blocks; t++) {
if(*__src == '\0') break;
__dst[t] = *__src++;
}
__dst[t] = '\0';
@ -220,24 +210,23 @@ __STATIC_API__ fstring string_copy(
}
/**
* make a heap allocation, and copy the
* source fstring to the new allocation, and
* you should free it after use it .
* make a heap allocation, and copy the
* source fstring to the new allocation, and
* you should free it after use it .
*
* @param _src source fstring
* @param blocks number of bytes to copy
*/
__STATIC_API__ fstring string_copy_heap(
fstring _src, uint_t blocks )
{
__STATIC_API__ fstring string_copy_heap(
fstring _src, uint_t blocks) {
register uint_t t;
fstring str = ( fstring ) FRISO_MALLOC( blocks + 1 );
if ( str == NULL ) {
fstring str = (fstring) FRISO_MALLOC(blocks + 1);
if(str == NULL) {
___ALLOCATION_ERROR___;
}
for ( t = 0; t < blocks; t++ ) {
for(t = 0; t < blocks; t++) {
//if ( *_src == '\0' ) break;
str[t] = *_src++;
}
@ -249,15 +238,14 @@ __STATIC_API__ fstring string_copy_heap(
/*
* find the postion of the first appear of the given char.
* address of the char in the fstring will be return .
* if not found NULL will be return .
* if not found NULL will be return .
*/
__STATIC_API__ fstring indexOf( fstring __str, char delimiter )
{
__STATIC_API__ fstring indexOf(fstring __str, char delimiter) {
uint_t i, __length__;
__length__ = strlen( __str );
for ( i = 0; i < __length__; i++ ) {
if ( __str[i] == delimiter ) {
__length__ = strlen(__str);
for(i = 0; i < __length__; i++) {
if(__str[i] == delimiter) {
return __str + i;
}
}
@ -266,20 +254,19 @@ __STATIC_API__ fstring indexOf( fstring __str, char delimiter )
}
/**
* load all the valid wors from a specified lexicon file .
* load all the valid wors from a specified lexicon file .
*
* @param dic friso dictionary instance (A hash array)
* @param lex the lexicon type
* @param lex_file the path of the lexicon file
* @param length the maximum length of the word item
*/
FRISO_API void friso_dic_load(
friso_t friso,
friso_config_t config,
friso_lex_t lex,
fstring lex_file,
uint_t length )
{
FRISO_API void friso_dic_load(
friso_t friso,
friso_config_t config,
friso_lex_t lex,
fstring lex_file,
uint_t length) {
FILE * _stream;
char __char[1024], _buffer[512];
@ -292,35 +279,35 @@ FRISO_API void friso_dic_load(
friso_array_t sywords;
uint_t _fre;
if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL ) {
while ( ( _line = file_get_line( __char, _stream ) ) != NULL ) {
if((_stream = fopen(lex_file, "rb")) != NULL) {
while((_line = file_get_line(__char, _stream)) != NULL) {
//clear up the notes
//make sure the length of the line is greater than 1.
//like the single '#' mark in stopwords dictionary.
if ( _line[0] == '#' && strlen(_line) > 1 ) continue;
if(_line[0] == '#' && strlen(_line) > 1) continue;
//handle the stopwords.
if ( lex == __LEX_STOPWORDS__ ) {
if(lex == __LEX_STOPWORDS__) {
//clean the chinese words that its length is greater than max length.
if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue;
friso_dic_add( friso->dic, __LEX_STOPWORDS__,
string_copy_heap( _line, strlen(_line) ), NULL );
if(((int)_line[0]) < 0 && strlen(_line) > length) continue;
friso_dic_add(friso->dic, __LEX_STOPWORDS__,
string_copy_heap(_line, strlen(_line)), NULL);
continue;
}
//split the fstring with '/'.
string_split_reset( &sse, "/", _line);
if ( string_split_next( &sse, _buffer ) == NULL ) {
string_split_reset(&sse, "/", _line);
if(string_split_next(&sse, _buffer) == NULL) {
continue;
}
//1. get the word.
_word = string_copy_heap( _buffer, strlen(_buffer) );
_word = string_copy_heap(_buffer, strlen(_buffer));
if ( string_split_next( &sse, _buffer ) == NULL ) {
//normal lexicon type,
if(string_split_next(&sse, _buffer) == NULL) {
//normal lexicon type,
//add them to the dictionary directly
friso_dic_add( friso->dic, lex, _word, NULL );
friso_dic_add(friso->dic, lex, _word, NULL);
continue;
}
@ -330,87 +317,86 @@ FRISO_API void friso_dic_load(
* but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__
* and __LEX_CEM_WORDS__.
*/
if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ )
&& strlen( _word ) > length ) {
if(!(lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__)
&& strlen(_word) > length) {
FRISO_FREE(_word);
continue;
}
//2. get the synonyms words.
_syn = NULL;
if ( strcmp( _buffer, "null" ) != 0 ) {
_syn = string_copy( _buffer, _sbuffer, strlen(_buffer) );
if(strcmp(_buffer, "null") != 0) {
_syn = string_copy(_buffer, _sbuffer, strlen(_buffer));
}
//3. get the word frequency if it available.
_fre = 0;
if ( string_split_next( &sse, _buffer ) != NULL ) {
_fre = atoi( _buffer );
if(string_split_next(&sse, _buffer) != NULL) {
_fre = atoi(_buffer);
}
/**
* Here:
* split the synonyms words with mark ","
* split the synonyms words with mark ","
* and put them in a array list if the synonyms is not NULL
*/
sywords = NULL;
if ( config->add_syn && _syn != NULL ) {
string_split_reset( &sse, ",", _sbuffer );
if(config->add_syn && _syn != NULL) {
string_split_reset(&sse, ",", _sbuffer);
sywords = new_array_list_with_opacity(5);
while ( string_split_next( &sse, _buffer ) != NULL ) {
if ( strlen(_buffer) > length ) continue;
array_list_add( sywords,
string_copy_heap(_buffer, strlen(_buffer)) );
while(string_split_next(&sse, _buffer) != NULL) {
if(strlen(_buffer) > length) continue;
array_list_add(sywords,
string_copy_heap(_buffer, strlen(_buffer)));
}
sywords = array_list_trim( sywords );
sywords = array_list_trim(sywords);
}
//4. add the word item
friso_dic_add_with_fre(
friso->dic, lex, _word, sywords, _fre );
}
friso_dic_add_with_fre(
friso->dic, lex, _word, sywords, _fre);
}
fclose( _stream );
fclose(_stream);
} else {
fprintf(stderr, "Warning: Fail to open lexicon file %s\n", lex_file);
fprintf(stderr, "Warning: Without lexicon file, segment results will not correct \n");
}
}
}
/**
* get the lexicon type index with the specified
* type keywords .
* get the lexicon type index with the specified
* type keywords .
*
* @see friso.h#friso_lex_t
* @param _key
* @return int
*/
__STATIC_API__ friso_lex_t get_lexicon_type_with_constant( fstring _key )
{
if ( strcmp( _key, "__LEX_CJK_WORDS__" ) == 0 ) {
__STATIC_API__ friso_lex_t get_lexicon_type_with_constant(fstring _key) {
if(strcmp(_key, "__LEX_CJK_WORDS__") == 0) {
return __LEX_CJK_WORDS__;
} else if ( strcmp( _key, "__LEX_CJK_UNITS__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_CJK_UNITS__") == 0) {
return __LEX_CJK_UNITS__;
} else if ( strcmp( _key, "__LEX_ECM_WORDS__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_ECM_WORDS__") == 0) {
return __LEX_ECM_WORDS__;
} else if ( strcmp( _key, "__LEX_CEM_WORDS__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_CEM_WORDS__") == 0) {
return __LEX_CEM_WORDS__;
} else if ( strcmp( _key, "__LEX_CN_LNAME__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_CN_LNAME__") == 0) {
return __LEX_CN_LNAME__;
} else if ( strcmp( _key, "__LEX_CN_SNAME__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_CN_SNAME__") == 0) {
return __LEX_CN_SNAME__;
} else if ( strcmp( _key, "__LEX_CN_DNAME1__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_CN_DNAME1__") == 0) {
return __LEX_CN_DNAME1__;
} else if ( strcmp( _key, "__LEX_CN_DNAME2__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_CN_DNAME2__") == 0) {
return __LEX_CN_DNAME2__;
} else if ( strcmp( _key, "__LEX_CN_LNA__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_CN_LNA__") == 0) {
return __LEX_CN_LNA__;
} else if ( strcmp( _key, "__LEX_STOPWORDS__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_STOPWORDS__") == 0) {
return __LEX_STOPWORDS__;
} else if ( strcmp( _key, "__LEX_ENPUN_WORDS__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_ENPUN_WORDS__") == 0) {
return __LEX_ENPUN_WORDS__;
} else if ( strcmp( _key, "__LEX_EN_WORDS__" ) == 0 ) {
} else if(strcmp(_key, "__LEX_EN_WORDS__") == 0) {
return __LEX_EN_WORDS__;
}
@ -424,14 +410,13 @@ __STATIC_API__ friso_lex_t get_lexicon_type_with_constant( fstring _key )
* @param friso friso instance
* @param config friso_config instance
* @param _path dictionary directory
* @param _limitts words length limit
* @param _limitts words length limit
*/
FRISO_API void friso_dic_load_from_ifile(
friso_t friso,
friso_config_t config,
fstring _path,
uint_t _limits )
{
FRISO_API void friso_dic_load_from_ifile(
friso_t friso,
friso_config_t config,
fstring _path,
uint_t _limits) {
//1.parse the configuration file.
FILE *__stream;
@ -443,116 +428,112 @@ FRISO_API void friso_dic_load_from_ifile(
//get the lexicon configruation file path
sb = new_string_buffer();
string_buffer_append( sb, _path );
string_buffer_append( sb, __FRISO_LEX_IFILE__ );
string_buffer_append(sb, _path);
string_buffer_append(sb, __FRISO_LEX_IFILE__);
//printf("%s\n", sb->buffer);
if ( ( __stream = fopen( sb->buffer, "rb" ) ) != NULL ) {
while ( ( __line__ =
file_get_line( __chars__, __stream ) ) != NULL ) {
if((__stream = fopen(sb->buffer, "rb")) != NULL) {
while((__line__ =
file_get_line(__chars__, __stream)) != NULL) {
//comment filter.
if ( __line__[0] == '#' ) continue;
if ( __line__[0] == '\0' ) continue;
if(__line__[0] == '#') continue;
if(__line__[0] == '\0') continue;
__length__ = strlen( __line__ );
__length__ = strlen(__line__);
//item start
if ( __line__[ __length__ - 1 ] == '[' ) {
if(__line__[ __length__ - 1 ] == '[') {
//get the type key
for ( i = 0; i < __length__
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
for ( t = 0; i < __length__; i++,t++ ) {
if ( __line__[i] == ' '
|| __line__[i] == '\t' || __line__[i] == ':' ) break;
for(i = 0; i < __length__
&& (__line__[i] == ' ' || __line__[i] == '\t'); i++);
for(t = 0; i < __length__; i++, t++) {
if(__line__[i] == ' '
|| __line__[i] == '\t' || __line__[i] == ':') break;
__key__[t] = __line__[i];
}
__key__[t] = '\0';
//get the lexicon type
lex_t = get_lexicon_type_with_constant(__key__);
if ( lex_t == -1 ) continue;
if(lex_t == -1) continue;
//printf("key=%s, type=%d\n", __key__, lex_t );
while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL ) {
while((__line__ = file_get_line(__chars__, __stream)) != NULL) {
//comments filter.
if ( __line__[0] == '#' ) continue;
if ( __line__[0] == '\0' ) continue;
if(__line__[0] == '#') continue;
if(__line__[0] == '\0') continue;
__length__ = strlen( __line__ );
if ( __line__[ __length__ - 1 ] == ']' ) break;
__length__ = strlen(__line__);
if(__line__[ __length__ - 1 ] == ']') break;
for ( i = 0; i < __length__
&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
for ( t = 0; i < __length__; i++,t++ ) {
if ( __line__[i] == ' '
|| __line__[i] == '\t' || __line__[i] == ';' ) break;
__key__[t] = __line__[i];
for(i = 0; i < __length__
&& (__line__[i] == ' ' || __line__[i] == '\t'); i++);
for(t = 0; i < __length__; i++, t++) {
if(__line__[i] == ' '
|| __line__[i] == '\t' || __line__[i] == ';') break;
__key__[t] = __line__[i];
}
__key__[t] = '\0';
//load the lexicon item from the lexicon file.
string_buffer_clear( sb );
string_buffer_append( sb, _path );
string_buffer_append( sb, __key__ );
string_buffer_clear(sb);
string_buffer_append(sb, _path);
string_buffer_append(sb, __key__);
//printf("key=%s, type=%d\n", __key__, lex_t);
friso_dic_load( friso, config, lex_t, sb->buffer, _limits );
friso_dic_load(friso, config, lex_t, sb->buffer, _limits);
}
}
}
} //end while
fclose( __stream );
fclose(__stream);
} else {
fprintf(stderr, "Warning: Fail to open the lexicon configuration file %s\n", sb->buffer);
fprintf(stderr, "Warning: Without lexicon file, segment results will not correct \n");
}
free_string_buffer(sb);
free_string_buffer(sb);
}
//match the item.
FRISO_API int friso_dic_match(
friso_dic_t dic,
friso_lex_t lex,
fstring word )
{
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return hash_exist_mapping( dic[lex], word );
FRISO_API int friso_dic_match(
friso_dic_t dic,
friso_lex_t lex,
fstring word) {
if(lex >= 0 && lex < __FRISO_LEXICON_LENGTH__) {
return hash_exist_mapping(dic[lex], word);
}
return 0;
}
//get the lex_entry_t associated with the word.
FRISO_API lex_entry_t friso_dic_get(
friso_dic_t dic,
friso_lex_t lex,
fstring word )
{
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return ( lex_entry_t ) hash_get_value( dic[lex], word );
FRISO_API lex_entry_t friso_dic_get(
friso_dic_t dic,
friso_lex_t lex,
fstring word) {
if(lex >= 0 && lex < __FRISO_LEXICON_LENGTH__) {
return (lex_entry_t) hash_get_value(dic[lex], word);
}
return NULL;
}
//get the size of the specified type dictionary.
FRISO_API uint_t friso_spec_dic_size(
friso_dic_t dic,
friso_lex_t lex )
{
if ( lex >= 0 && lex < __FRISO_LEXICON_LENGTH__ ) {
return hash_get_size( dic[lex] );
FRISO_API uint_t friso_spec_dic_size(
friso_dic_t dic,
friso_lex_t lex) {
if(lex >= 0 && lex < __FRISO_LEXICON_LENGTH__) {
return hash_get_size(dic[lex]);
}
return 0;
}
//get size of the whole dictionary.
FRISO_API uint_t friso_all_dic_size(
friso_dic_t dic )
{
FRISO_API uint_t friso_all_dic_size(
friso_dic_t dic) {
register uint_t size = 0, t;
for ( t = 0; t < __FRISO_LEXICON_LENGTH__; t++ ) {
size += hash_get_size( dic[t] );
for(t = 0; t < __FRISO_LEXICON_LENGTH__; t++) {
size += hash_get_size(dic[t]);
}
return size;

View File

@ -10,14 +10,13 @@
#include <stdlib.h>
//create a new link list node.
__STATIC_API__ link_node_t new_node_entry(
void * value,
link_node_t prev,
link_node_t next )
{
link_node_t node = ( link_node_t )
FRISO_MALLOC( sizeof( link_node_entry ) );
if ( node == NULL ) {
__STATIC_API__ link_node_t new_node_entry(
void * value,
link_node_t prev,
link_node_t next) {
link_node_t node = (link_node_t)
FRISO_MALLOC(sizeof(link_node_entry));
if(node == NULL) {
___ALLOCATION_ERROR___
}
@ -29,17 +28,16 @@ __STATIC_API__ link_node_t new_node_entry(
}
//create a new link list
FRISO_API friso_link_t new_link_list( void )
{
friso_link_t e = ( friso_link_t )
FRISO_MALLOC( sizeof( friso_link_entry ) );
if ( e == NULL ) {
FRISO_API friso_link_t new_link_list(void) {
friso_link_t e = (friso_link_t)
FRISO_MALLOC(sizeof(friso_link_entry));
if(e == NULL) {
___ALLOCATION_ERROR___
}
//initialize the entry
e->head = new_node_entry( NULL, NULL, NULL );
e->tail = new_node_entry( NULL, e->head, NULL );
e->head = new_node_entry(NULL, NULL, NULL);
e->tail = new_node_entry(NULL, e->head, NULL);
e->head->next = e->tail;
e->size = 0;
@ -47,28 +45,25 @@ FRISO_API friso_link_t new_link_list( void )
}
//free the given link list
FRISO_API void free_link_list( friso_link_t link )
{
FRISO_API void free_link_list(friso_link_t link) {
link_node_t node, next;
for ( node = link->head; node != NULL; ) {
for(node = link->head; node != NULL;) {
next = node->next;
FRISO_FREE( node );
FRISO_FREE(node);
node = next;
}
FRISO_FREE( link );
FRISO_FREE(link);
}
//clear all nodes in the link list.
FRISO_API friso_link_t link_list_clear(
friso_link_t link )
{
FRISO_API friso_link_t link_list_clear(
friso_link_t link) {
link_node_t node, next;
//free all the middle nodes.
for ( node = link->head->next; node != link->tail; )
{
for(node = link->head->next; node != link->tail;) {
next = node->next;
FRISO_FREE( node );
FRISO_FREE(node);
node = next;
}
@ -94,22 +89,20 @@ FRISO_API friso_link_t link_list_clear(
* find the node at a specified position.
* static
*/
__STATIC_API__ link_node_t get_node(
friso_link_t link, uint_t idx )
{
__STATIC_API__ link_node_t get_node(
friso_link_t link, uint_t idx) {
link_node_t p = NULL;
register uint_t t;
if ( idx >= 0 && idx < link->size )
{
if ( idx < link->size / 2 ) { //find from the head.
if(idx >= 0 && idx < link->size) {
if(idx < link->size / 2) { //find from the head.
p = link->head;
for ( t = 0; t <= idx; t++ )
p = p->next;
for(t = 0; t <= idx; t++)
p = p->next;
} else { //find from the tail.
p = link->tail;
for ( t = link->size; t > idx; t-- )
p = p->prev;
for(t = link->size; t > idx; t--)
p = p->prev;
}
}
@ -120,10 +113,10 @@ __STATIC_API__ link_node_t get_node(
* insert a node before the given node.
* static
*/
//__STATIC_API__ void insert_before(
// friso_link_t link,
// link_node_t node,
// void * value )
//__STATIC_API__ void insert_before(
// friso_link_t link,
// link_node_t node,
// void * value )
//{
// link_node_t e = new_node_entry( value, node->prev, node );
// e->prev->next = e;
@ -147,48 +140,44 @@ __STATIC_API__ link_node_t get_node(
*
* @return the value of the removed node.
*/
__STATIC_API__ void * remove_node(
friso_link_t link, link_node_t node )
{
__STATIC_API__ void * remove_node(
friso_link_t link, link_node_t node) {
void * _value = node->value;
node->prev->next = node->next;
node->next->prev = node->prev;
link->size--;
FRISO_FREE( node );
FRISO_FREE(node);
return _value;
}
//add a new node to the link list.(insert just before the tail)
FRISO_API void link_list_add(
friso_link_t link, void * value )
{
insert_before( link, link->tail, value );
FRISO_API void link_list_add(
friso_link_t link, void * value) {
insert_before(link, link->tail, value);
}
//add a new node before the given index.
FRISO_API void link_list_insert_before(
friso_link_t link, uint_t idx, void * value )
{
link_node_t node = get_node( link, idx );
if ( node != NULL ) {
insert_before( link, node, value );
FRISO_API void link_list_insert_before(
friso_link_t link, uint_t idx, void * value) {
link_node_t node = get_node(link, idx);
if(node != NULL) {
insert_before(link, node, value);
}
}
/*
* get the value with the specified node.
*
*
* @return the value of the node.
*/
FRISO_API void * link_list_get(
friso_link_t link, uint_t idx )
{
link_node_t node = get_node( link, idx );
if ( node != NULL ) {
FRISO_API void * link_list_get(
friso_link_t link, uint_t idx) {
link_node_t node = get_node(link, idx);
if(node != NULL) {
return node->value;
}
return NULL;
@ -198,17 +187,16 @@ FRISO_API void * link_list_get(
* set the value of the node that located in the specified position.
* we did't free the allocation of the old value, we return it to you.
* free it yourself when it is necessary.
*
*
* @return the old value.
*/
FRISO_API void *link_list_set(
friso_link_t link,
uint_t idx, void * value )
{
link_node_t node = get_node( link, idx );
FRISO_API void *link_list_set(
friso_link_t link,
uint_t idx, void * value) {
link_node_t node = get_node(link, idx);
void * _value = NULL;
if ( node != NULL ) {
if(node != NULL) {
_value = node->value;
node->value = value;
}
@ -222,14 +210,13 @@ FRISO_API void *link_list_set(
* @see remove_node
* @return the value of the node removed.
*/
FRISO_API void *link_list_remove(
friso_link_t link, uint_t idx )
{
link_node_t node = get_node( link, idx );
FRISO_API void *link_list_remove(
friso_link_t link, uint_t idx) {
link_node_t node = get_node(link, idx);
if ( node != NULL ) {
if(node != NULL) {
//printf("idx=%d, node->value=%s\n", idx, (string) node->value );
return remove_node( link, node );
return remove_node(link, node);
}
return NULL;
@ -237,48 +224,43 @@ FRISO_API void *link_list_remove(
/*
* remove the given node from the given link list.
*
*
* @see remove_node.
* @return the value of the node removed.
*/
FRISO_API void *link_list_remove_node(
friso_link_t link,
link_node_t node )
{
return remove_node( link, node );
FRISO_API void *link_list_remove_node(
friso_link_t link,
link_node_t node) {
return remove_node(link, node);
}
//remove the first node after the head
FRISO_API void *link_list_remove_first(
friso_link_t link )
{
if ( link->size > 0 ) {
return remove_node( link, link->head->next );
FRISO_API void *link_list_remove_first(
friso_link_t link) {
if(link->size > 0) {
return remove_node(link, link->head->next);
}
return NULL;
}
//remove the last node just before the tail.
FRISO_API void *link_list_remove_last(
friso_link_t link )
{
if ( link->size > 0 ) {
return remove_node( link, link->tail->prev );
FRISO_API void *link_list_remove_last(
friso_link_t link) {
if(link->size > 0) {
return remove_node(link, link->tail->prev);
}
return NULL;
}
//append a node from the tail.
FRISO_API void link_list_add_last(
friso_link_t link,
void *value )
{
insert_before( link, link->tail, value );
FRISO_API void link_list_add_last(
friso_link_t link,
void *value) {
insert_before(link, link->tail, value);
}
//append a note just after the head.
FRISO_API void link_list_add_first(
friso_link_t link, void *value )
{
insert_before( link, link->head->next, value );
FRISO_API void link_list_add_first(
friso_link_t link, void *value) {
insert_before(link, link->head->next, value);
}

View File

@ -1,6 +1,6 @@
/*
* utf-8 handle functions implementation.
*
*
* @author lionsoul<chenxin619315@gmail.com>
*/
@ -21,32 +21,30 @@
*
* @date: 2014-10-16
*/
__STATIC_API__ fstring create_buffer( uint_t length )
{
fstring buffer = ( fstring ) FRISO_MALLOC( length + 1 );
if ( buffer == NULL ) {
__STATIC_API__ fstring create_buffer(uint_t length) {
fstring buffer = (fstring) FRISO_MALLOC(length + 1);
if(buffer == NULL) {
___ALLOCATION_ERROR___
}
memset( buffer, 0x00, length + 1 );
memset(buffer, 0x00, length + 1);
return buffer;
}
//the __allocs should not be smaller than sb->length
__STATIC_API__ string_buffer_t resize_buffer(
string_buffer_t sb, uint_t __allocs )
{
__STATIC_API__ string_buffer_t resize_buffer(
string_buffer_t sb, uint_t __allocs) {
//create a new buffer.
//if ( __allocs < sb->length ) __allocs = sb->length + 1;
fstring str = create_buffer( __allocs );
//if ( __allocs < sb->length ) __allocs = sb->length + 1;
fstring str = create_buffer(__allocs);
//register uint_t t;
//for ( t = 0; t < sb->length; t++ ) {
// str[t] = sb->buffer[t];
//}
memcpy( str, sb->buffer, sb->length );
FRISO_FREE( sb->buffer );
memcpy(str, sb->buffer, sb->length);
FRISO_FREE(sb->buffer);
sb->buffer = str;
sb->allocs = __allocs;
@ -55,21 +53,20 @@ __STATIC_API__ string_buffer_t resize_buffer(
}
//create a new fstring buffer with a default opacity.
//FRISO_API string_buffer_t new_string_buffer( void )
//FRISO_API string_buffer_t new_string_buffer( void )
//{
// return new_string_buffer_with_opacity( __BUFFER_DEFAULT_LENGTH__ );
//}
//create a new fstring buffer with the given opacity.
FRISO_API string_buffer_t new_string_buffer_with_opacity( uint_t opacity )
{
string_buffer_t sb = ( string_buffer_t )
FRISO_MALLOC( sizeof( string_buffer_entry ) );
if ( sb == NULL ) {
FRISO_API string_buffer_t new_string_buffer_with_opacity(uint_t opacity) {
string_buffer_t sb = (string_buffer_t)
FRISO_MALLOC(sizeof(string_buffer_entry));
if(sb == NULL) {
___ALLOCATION_ERROR___
}
}
sb->buffer = create_buffer( opacity );
sb->buffer = create_buffer(opacity);
sb->length = 0;
sb->allocs = opacity;
@ -77,18 +74,17 @@ FRISO_API string_buffer_t new_string_buffer_with_opacity( uint_t opacity )
}
//create a buffer with the given string.
FRISO_API string_buffer_t new_string_buffer_with_string( fstring str )
{
FRISO_API string_buffer_t new_string_buffer_with_string(fstring str) {
//buffer allocations.
string_buffer_t sb = ( string_buffer_t )
FRISO_MALLOC( sizeof( string_buffer_entry ) );
if ( sb == NULL ) {
string_buffer_t sb = (string_buffer_t)
FRISO_MALLOC(sizeof(string_buffer_entry));
if(sb == NULL) {
___ALLOCATION_ERROR___
}
//initialize
sb->length = strlen( str );
sb->buffer = create_buffer( sb->length + __BUFFER_DEFAULT_LENGTH__ );
sb->length = strlen(str);
sb->buffer = create_buffer(sb->length + __BUFFER_DEFAULT_LENGTH__);
sb->allocs = sb->length + __BUFFER_DEFAULT_LENGTH__;
//register uint_t t;
@ -96,19 +92,18 @@ FRISO_API string_buffer_t new_string_buffer_with_string( fstring str )
//for ( t = 0; t < sb->length; t++ ) {
// sb->buffer[t] = str[t];
//}
memcpy( sb->buffer, str, sb->length );
memcpy(sb->buffer, str, sb->length);
return sb;
}
FRISO_API void string_buffer_append(
string_buffer_t sb, fstring __str )
{
register uint_t __len__ = strlen( __str );
FRISO_API void string_buffer_append(
string_buffer_t sb, fstring __str) {
register uint_t __len__ = strlen(__str);
//check the necessity to resize the buffer.
if ( sb->length + __len__ > sb->allocs ) {
sb = resize_buffer( sb, ( sb->length + __len__ ) * 2 + 1 );
if(sb->length + __len__ > sb->allocs) {
sb = resize_buffer(sb, (sb->length + __len__) * 2 + 1);
}
//register uint_t t;
@ -116,26 +111,24 @@ FRISO_API void string_buffer_append(
//for ( t = 0; t < __len__; t++ ) {
// sb->buffer[ sb->length++ ] = __str[t];
//}
memcpy( sb->buffer + sb->length, __str, __len__ );
memcpy(sb->buffer + sb->length, __str, __len__);
sb->length += __len__;
}
FRISO_API void string_buffer_append_char(
string_buffer_t sb, char ch )
{
FRISO_API void string_buffer_append_char(
string_buffer_t sb, char ch) {
//check the necessity to resize the buffer.
if ( sb->length + 1 > sb->allocs ) {
sb = resize_buffer( sb, sb->length * 2 + 1 );
if(sb->length + 1 > sb->allocs) {
sb = resize_buffer(sb, sb->length * 2 + 1);
}
sb->buffer[sb->length++] = ch;
}
FRISO_API void string_buffer_insert(
string_buffer_t sb,
uint_t idx,
fstring __str )
{
FRISO_API void string_buffer_insert(
string_buffer_t sb,
uint_t idx,
fstring __str) {
}
/*
@ -144,26 +137,25 @@ FRISO_API void string_buffer_insert(
*
* @return the new string.
*/
FRISO_API fstring string_buffer_remove(
string_buffer_t sb,
uint_t idx,
uint_t length )
{
FRISO_API fstring string_buffer_remove(
string_buffer_t sb,
uint_t idx,
uint_t length) {
uint_t t;
//move the bytes after the idx + length
for ( t = idx + length; t < sb->length; t++ ) {
for(t = idx + length; t < sb->length; t++) {
sb->buffer[t - length] = sb->buffer[t];
}
sb->buffer[t] = '\0';
//memcpy( sb->buffer + idx,
// sb->buffer + idx + length,
//memcpy( sb->buffer + idx,
// sb->buffer + idx + length,
// sb->length - idx - length );
t = sb->length - idx;
if ( t > 0 ) {
sb->length -= ( t > length ) ? length : t;
if(t > 0) {
sb->length -= (t > length) ? length : t;
}
sb->buffer[sb->length-1] = '\0';
sb->buffer[sb->length - 1] = '\0';
return sb->buffer;
}
@ -172,25 +164,23 @@ FRISO_API fstring string_buffer_remove(
* turn the string_buffer to a string.
* or return the buffer of the string_buffer.
*/
FRISO_API string_buffer_t string_buffer_trim( string_buffer_t sb )
{
FRISO_API string_buffer_t string_buffer_trim(string_buffer_t sb) {
//resize the buffer.
if ( sb->length < sb->allocs - 1 ) {
sb = resize_buffer( sb, sb->length + 1 );
if(sb->length < sb->allocs - 1) {
sb = resize_buffer(sb, sb->length + 1);
}
return sb;
}
/*
* free the given fstring buffer.
* and this function will not free the allocations of the
* and this function will not free the allocations of the
* string_buffer_t->buffer, we return it to you, if there is
* a necessary you could free it youself by calling free();
*/
FRISO_API fstring string_buffer_devote( string_buffer_t sb )
{
FRISO_API fstring string_buffer_devote(string_buffer_t sb) {
fstring buffer = sb->buffer;
FRISO_FREE( sb );
FRISO_FREE(sb);
return buffer;
}
@ -198,17 +188,15 @@ FRISO_API fstring string_buffer_devote( string_buffer_t sb )
* clear the given fstring buffer.
* reset its buffer with 0 and reset its length to 0.
*/
FRISO_API void string_buffer_clear( string_buffer_t sb )
{
memset( sb->buffer, 0x00, sb->length );
FRISO_API void string_buffer_clear(string_buffer_t sb) {
memset(sb->buffer, 0x00, sb->length);
sb->length = 0;
}
//free everything of the fstring buffer.
FRISO_API void free_string_buffer( string_buffer_t sb )
{
FRISO_FREE( sb->buffer );
FRISO_FREE( sb );
FRISO_API void free_string_buffer(string_buffer_t sb) {
FRISO_FREE(sb->buffer);
FRISO_FREE(sb);
}
@ -216,15 +204,14 @@ FRISO_API void free_string_buffer( string_buffer_t sb )
* create a new string_split_entry.
*
* @param source
* @return string_split_t;
* @return string_split_t;
*/
FRISO_API string_split_t new_string_split(
fstring delimiter,
fstring source )
{
string_split_t e = ( string_split_t )
FRISO_MALLOC( sizeof( string_split_entry ) );
if ( e == NULL ) {
FRISO_API string_split_t new_string_split(
fstring delimiter,
fstring source) {
string_split_t e = (string_split_t)
FRISO_MALLOC(sizeof(string_split_entry));
if(e == NULL) {
___ALLOCATION_ERROR___;
}
@ -237,70 +224,65 @@ FRISO_API string_split_t new_string_split(
return e;
}
FRISO_API void string_split_reset(
string_split_t sst,
fstring delimiter,
fstring source )
{
FRISO_API void string_split_reset(
string_split_t sst,
fstring delimiter,
fstring source) {
sst->delimiter = delimiter;
sst->delLen = strlen(delimiter);
sst->source = source;
sst->srcLen = strlen(source);
sst->idx = 0;
}
FRISO_API void string_split_set_source(
string_split_t sst, fstring source )
{
sst->source = source;
sst->srcLen = strlen(source);
sst->idx = 0;
}
FRISO_API void string_split_set_delimiter(
string_split_t sst, fstring delimiter )
{
sst->delimiter = delimiter;
sst->delLen = strlen( delimiter );
FRISO_API void string_split_set_source(
string_split_t sst, fstring source) {
sst->source = source;
sst->srcLen = strlen(source);
sst->idx = 0;
}
FRISO_API void free_string_split( string_split_t sst )
{
FRISO_API void string_split_set_delimiter(
string_split_t sst, fstring delimiter) {
sst->delimiter = delimiter;
sst->delLen = strlen(delimiter);
sst->idx = 0;
}
FRISO_API void free_string_split(string_split_t sst) {
FRISO_FREE(sst);
}
/**
* get the next split fstring, and copy the
* splited fstring into the __dst buffer .
* get the next split fstring, and copy the
* splited fstring into the __dst buffer .
*
* @param string_split_t
* @param __dst
* @return fstring (NULL if reach the end of the source
* @return fstring (NULL if reach the end of the source
* or there is no more segmentation)
*/
FRISO_API fstring string_split_next(
string_split_t sst, fstring __dst)
{
FRISO_API fstring string_split_next(
string_split_t sst, fstring __dst) {
uint_t i, _ok;
fstring _dst = __dst;
//check if reach the end of the fstring
if ( sst->idx >= sst->srcLen ) return NULL;
if(sst->idx >= sst->srcLen) return NULL;
while ( 1 ) {
while(1) {
_ok = 1;
for ( i = 0; i < sst->delLen
&& (sst->idx + i < sst->srcLen); i++ ) {
if ( sst->source[sst->idx+i] != sst->delimiter[i] ) {
for(i = 0; i < sst->delLen
&& (sst->idx + i < sst->srcLen); i++) {
if(sst->source[sst->idx + i] != sst->delimiter[i]) {
_ok = 0;
break;
}
}
}
//find the delimiter here,
//break the loop and self plus the sst->idx, then return the buffer .
if ( _ok == 1 ) {
//break the loop and self plus the sst->idx, then return the buffer .
if(_ok == 1) {
sst->idx += sst->delLen;
break;
}
@ -308,7 +290,7 @@ FRISO_API fstring string_split_next(
//coy the char to the buffer
*_dst++ = sst->source[sst->idx++];
//check if reach the end of the fstring
if ( sst->idx >= sst->srcLen ) break;
if(sst->idx >= sst->srcLen) break;
}
*_dst = '\0';

View File

@ -8,10 +8,10 @@
#include <stdio.h>
#include <stdlib.h>
int main( int argc, char **args ) {
int main(int argc, char **args) {
//create a new array list.
friso_array_t array = new_array_list();
friso_array_t array = new_array_list();
fstring keys[] = {
"chenmanwen", "yangqinghua",
"chenxin", "luojiangyan", "xiaoyanzi", "bibi",
@ -20,31 +20,31 @@ int main( int argc, char **args ) {
"chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo",
"caizaili", "panpan", "xiaolude", "yintanwen"
};
int j, idx = 2, len = sizeof( keys ) / sizeof( fstring );
int j, idx = 2, len = sizeof(keys) / sizeof(fstring);
for ( j = 0; j < len; j++ ) {
array_list_add( array, keys[j] );
for(j = 0; j < len; j++) {
array_list_add(array, keys[j]);
}
printf("length=%d, allocations=%d\n", array->length, array->allocs );
array_list_trim( array );
printf("after tirm length=%d, allocations=%d\n", array->length, array->allocs );
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
printf("length=%d, allocations=%d\n", array->length, array->allocs);
array_list_trim(array);
printf("after tirm length=%d, allocations=%d\n", array->length, array->allocs);
printf("idx=%d, value=%s\n", idx, (fstring) array_list_get(array, idx));
printf("\nAfter set %dth item.\n", idx );
array_list_set( array, idx, "chenxin__" );
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
printf("\nAfter set %dth item.\n", idx);
array_list_set(array, idx, "chenxin__");
printf("idx=%d, value=%s\n", idx, (fstring) array_list_get(array, idx));
printf("\nAfter remove %dth item.\n", idx );
array_list_remove( array, idx );
printf("length=%d, allocations=%d\n", array->length, array->allocs );
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
printf("\nAfter remove %dth item.\n", idx);
array_list_remove(array, idx);
printf("length=%d, allocations=%d\n", array->length, array->allocs);
printf("idx=%d, value=%s\n", idx, (fstring) array_list_get(array, idx));
printf("\nInsert a item at %dth\n", idx );
array_list_insert( array, idx, "*chenxin*" );
printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );
printf("\nInsert a item at %dth\n", idx);
array_list_insert(array, idx, "*chenxin*");
printf("idx=%d, value=%s\n", idx, (fstring) array_list_get(array, idx));
free_array_list( array );
free_array_list(array);
return 0;
}

View File

@ -32,19 +32,18 @@ break;
println("+---------------------------------------------------------------+");
//read a line from a command line.
static fstring getLine( FILE *fp, fstring __dst )
{
static fstring getLine(FILE *fp, fstring __dst) {
register int c;
register fstring cs;
cs = __dst;
while ( ( c = getc( fp ) ) != EOF ) {
if ( c == '\n' ) break;
*cs++ = c;
while((c = getc(fp)) != EOF) {
if(c == '\n') break;
*cs++ = c;
}
*cs = '\0';
return ( c == EOF && cs == __dst ) ? NULL : __dst;
return (c == EOF && cs == __dst) ? NULL : __dst;
}
/*static void printcode( fstring str ) {
@ -57,8 +56,7 @@ static fstring getLine( FILE *fp, fstring __dst )
putchar('\n');
}*/
int main(int argc, char **argv)
{
int main(int argc, char **argv) {
clock_t s_time, e_time;
char line[__INPUT_LENGTH__] = {0};
@ -70,13 +68,13 @@ int main(int argc, char **argv)
friso_task_t task;
// get the lexicon directory from command line arguments
for ( i = 0; i < argc; i++ ) {
if ( strcasecmp( "-init", argv[i] ) == 0 ) {
__path__ = argv[i+1];
for(i = 0; i < argc; i++) {
if(strcasecmp("-init", argv[i]) == 0) {
__path__ = argv[i + 1];
}
}
if ( __path__ == NULL ) {
if(__path__ == NULL) {
println("Usage: friso -init lexicon path");
exit(0);
}
@ -90,12 +88,12 @@ int main(int argc, char **argv)
friso_dic_load_from_ifile( dic, __path__, __LENGTH__ );
friso_set_dic( friso, dic );
friso_set_mode( friso, __FRISO_COMPLEX_MODE__ );*/
if ( friso_init_from_ifile(friso, config, __path__) != 1 ) {
if(friso_init_from_ifile(friso, config, __path__) != 1) {
printf("fail to initialize friso and config.\n");
goto err;
}
switch ( config->mode ) {
switch(config->mode) {
case __FRISO_SIMPLE_MODE__:
mode = "Simple";
break;
@ -114,41 +112,41 @@ int main(int argc, char **argv)
e_time = clock();
printf("Initialized in %fsec\n", (double) ( e_time - s_time ) / CLOCKS_PER_SEC );
printf("Initialized in %fsec\n", (double)(e_time - s_time) / CLOCKS_PER_SEC);
printf("Mode: %s\n", mode);
printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK" );
printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK");
___ABOUT___;
//set the task.
task = friso_new_task();
while ( 1 ) {
while(1) {
print("friso>> ");
getLine( stdin, line );
getLine(stdin, line);
//exit the programe
if (strcasecmp( line, "quit") == 0) {
if(strcasecmp(line, "quit") == 0) {
___EXIT_INFO___
}
//for ( i = 0; i < 1000000; i++ ) {
//set the task text.
friso_set_text( task, line );
friso_set_text(task, line);
println("分词结果:");
s_time = clock();
while ( ( config->next_token( friso, config, task ) ) != NULL ) {
while((config->next_token(friso, config, task)) != NULL) {
printf(
"%s[%d, %d, %d] ",
task->token->word,
task->token->offset,
task->token->length,
"%s[%d, %d, %d] ",
task->token->word,
task->token->offset,
task->token->length,
task->token->rlen
);
// printf("%s ", task->token->word);
}
//}
e_time = clock();
printf("\nDone, cost < %fsec\n", ( (double)(e_time - s_time) ) / CLOCKS_PER_SEC );
printf("\nDone, cost < %fsec\n", ((double)(e_time - s_time)) / CLOCKS_PER_SEC);
}

View File

@ -1,19 +1,18 @@
/**
* hashmap testing program
*
*
* @author lionsoul<chenxin619315@gmail.com>
*/
#include "friso_API.h"
#include <stdio.h>
void print_hash_info( friso_hash_t _hash ) {
void print_hash_info(friso_hash_t _hash) {
printf("info:length=%d, size=%d, facotr=%f, threshold=%d\n", _hash->length, \
_hash->size, _hash->factor, _hash->threshold);
}
_hash->size, _hash->factor, _hash->threshold);
}
int main(int argc, char **argv)
{
int main(int argc, char **argv) {
friso_hash_t _hash = new_hash_table();
char *names[] = {
"陈满文", "阳清华",
@ -31,13 +30,13 @@ int main(int argc, char **argv)
"周安", "郭桥安",
"刘敏", "黄广华",
"李胜", "黄海清"
};
};
//char *str[] = {"陈鑫", "张仁芳", "比比"};
char **str = names;
int j, len = 30;
print_hash_info( _hash );
for (j = 0; j < len; j++) {
print_hash_info(_hash);
for(j = 0; j < len; j++) {
hash_put_mapping(_hash, names[j], names[j]);
}
@ -47,7 +46,7 @@ int main(int argc, char **argv)
getchar();
//remove mappings
for (j = 0; j < len; j++) {
for(j = 0; j < len; j++) {
printf("Exist %s?%2d\n", str[j], hash_exist_mapping(_hash, str[j]));
printf("Now, remove %s\n", str[j]);
hash_remove_mapping(_hash, str[j]);

View File

@ -1,6 +1,6 @@
/*
* lex functions test program.
*
*
* @author lionsoul<chenxin619315@gmail.com>
*/
@ -16,8 +16,7 @@
printf("3. other search the words in the dictionary.\n"); \
printf("4. quit exit the programe.\n");
int main(int argc, char **argv)
{
int main(int argc, char **argv) {
lex_entry_t e;
int lex = __LEX_CJK_WORDS__;
char _line[__LENGTH__];
@ -59,42 +58,42 @@ int main(int argc, char **argv)
//__CN_DNAME2__
friso_dic_load(friso, config, __LEX_CN_DNAME2__, "../vendors/dict/UTF-8/lex-dname-2.lex", __LENGTH__);
//__CN_LNA__
friso_dic_load(friso, config, __LEX_CN_LNA__, "../vendors/dict/UTF-8/lex-ln-adorn.lex", __LENGTH__ );
friso_dic_load(friso, config, __LEX_CN_LNA__, "../vendors/dict/UTF-8/lex-ln-adorn.lex", __LENGTH__);
e_time = clock();
printf(
"Done, cost: %f sec, size=%d\n",
(double) (e_time - s_time) / CLOCKS_PER_SEC,
"Done, cost: %f sec, size=%d\n",
(double)(e_time - s_time) / CLOCKS_PER_SEC,
friso_all_dic_size(friso->dic)
);
while (1) {
while(1) {
printf("friso-%d>> ", lex);
if (scanf("%s", _line) != 1) {
if(scanf("%s", _line) != 1) {
printf("Invalid input\n");
continue;
}
if (strcmp( _line, "quit" ) == 0) {
if(strcmp(_line, "quit") == 0) {
break;
} else if ( strcmp(_line, "help") == 0 ) {
} else if(strcmp(_line, "help") == 0) {
___PRINT_HELP_INFO___
} else if ( strcmp( _line, "#set" ) == 0 ) {
} else if(strcmp(_line, "#set") == 0) {
printf("lex_t>> ");
if (scanf("%d", &lex) != 1) {
if(scanf("%d", &lex) != 1) {
printf("Warning: Invalid lex type input\n");
continue;
}
} else {
s_time = clock();
e = friso_dic_get( friso->dic, lex, _line );
e = friso_dic_get(friso->dic, lex, _line);
e_time = clock();
if (e != NULL) {
if(e != NULL) {
printf(
"word=%s, syn=%s, fre=%d, cost:%fsec\n",
e->word, e->syn==NULL? "NULL" : (char *)e->syn->items[0],
e->fre,
(double) (e_time - s_time) / CLOCKS_PER_SEC
"word=%s, syn=%s, fre=%d, cost:%fsec\n",
e->word, e->syn == NULL ? "NULL" : (char *)e->syn->items[0],
e->fre,
(double)(e_time - s_time) / CLOCKS_PER_SEC
);
} else {
printf("%s was not found.\n", _line);

View File

@ -1,6 +1,6 @@
/*
* link list test programe.
*
*
* @author lionsoul<chenxin619315@gmail.com>
*/
@ -8,7 +8,7 @@
#include <stdio.h>
#include <stdlib.h>
int main( int argc, char **args ) {
int main(int argc, char **args) {
friso_link_t link;
fstring keys[] = {
@ -19,32 +19,32 @@ int main( int argc, char **args ) {
"chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo",
"caizaili", "panpan", "xiaolude", "yintanwen"
};
int j, len = sizeof( keys ) / sizeof( fstring );
int j, len = sizeof(keys) / sizeof(fstring);
link = new_link_list();
//print the size of the link
printf("size=%d\n", link->size );
printf("size=%d\n", link->size);
for ( j = 0; j < len; j++ ) {
for(j = 0; j < len; j++) {
//link_add( link, keys[j] );
link_list_add_last( link, keys[j] );
link_list_add_last(link, keys[j]);
}
printf("size=%d\n", link->size );
printf("size=%d\n", link->size);
for ( j = 0; j < len / 2; j++ ) {
for(j = 0; j < len / 2; j++) {
//printf("idx=%d, remove %s\n", j, ( fstring ) link_remove( link, 0 ) );
printf("idx=%d, remove %s\n", j, ( fstring ) link_list_remove_first( link ) );
printf("idx=%d, remove %s\n", j, (fstring) link_list_remove_first(link));
}
printf("size=%d\n", link->size );
printf("size=%d\n", link->size);
//clear all the nodes
link_list_clear( link );
printf("size=%d, head->next->value=%s\n", link->size, ( fstring ) link->head->next->value );
link_list_clear(link);
printf("size=%d, head->next->value=%s\n", link->size, (fstring) link->head->next->value);
free_link_list( link );
free_link_list(link);
return 0;
}

View File

@ -8,17 +8,16 @@
#include <stdio.h>
#include <stdlib.h>
int main ( int argc, char **args )
{
fstring source = ",I am a chinese,,my name is Lion,and i am the author of friso,bug report email chenxin619315@gmail.com,qq:1187582057";
int main(int argc, char **args) {
fstring source = ",I am a chinese,,my name is Lion,and i am the author of friso,bug report email chenxin619315@gmail.com,qq:1187582057";
char buffer[128];
string_split_t split = new_string_split(",", source );
string_split_t split = new_string_split(",", source);
printf("sst->idx=%d\n", split->idx);
printf("sst->srcLen=%d\n", split->srcLen);
printf("sst->delLen=%d\n", split->delLen);
while ( string_split_next(split, buffer) != NULL) {
while(string_split_next(split, buffer) != NULL) {
printf("buffer:%s\n", buffer);
}

View File

@ -9,39 +9,39 @@
#include <stdlib.h>
#include <string.h>
int main( int argc, char **args ) {
int main(int argc, char **args) {
fstring str = "康熙字典部首, 符号和标点, 统一表意符号扩展 A ,CJK㈩兼Ⅱ容形式⑩.";
char word[4];
int bytes, t, j, length = strlen( str );
int bytes, t, j, length = strlen(str);
string_buffer_t sb = new_string_buffer();
printf("str=%s, length=%d\n", str, length );
printf("str=%s, length=%d\n", str, length);
for (t = 0; t < length; t += bytes) {
for(t = 0; t < length; t += bytes) {
bytes = get_utf8_bytes(*(str + t));
if ( bytes == 0 ) {
if(bytes == 0) {
continue;
}
for ( j = 0; j < bytes; j++ ) {
word[j] = *(str + t + j );
for(j = 0; j < bytes; j++) {
word[j] = *(str + t + j);
}
word[j] = '\0';
string_buffer_append( sb, word );
printf("word=%s\n", word );
string_buffer_append(sb, word);
printf("word=%s\n", word);
}
printf("length=%d, buffer=%s\n", sb->length, sb->buffer );
string_buffer_remove( sb, 0, 3 );
printf("length=%d, buffer=%s\n", sb->length, sb->buffer );
string_buffer_remove( sb, 0, 3 );
printf("length=%d, buffer=%s\n", sb->length, sb->buffer );
string_buffer_remove( sb, sb->length - 3, 6 );
sb = string_buffer_trim( sb );
printf("length=%d, buffer=%s\n", sb->length, string_buffer_devote( sb ) );
printf("length=%d, buffer=%s\n", sb->length, sb->buffer);
string_buffer_remove(sb, 0, 3);
printf("length=%d, buffer=%s\n", sb->length, sb->buffer);
string_buffer_remove(sb, 0, 3);
printf("length=%d, buffer=%s\n", sb->length, sb->buffer);
string_buffer_remove(sb, sb->length - 3, 6);
sb = string_buffer_trim(sb);
printf("length=%d, buffer=%s\n", sb->length, string_buffer_devote(sb));
//00011110 - yuan ma
//11100001 - fa ma

View File

@ -20,12 +20,11 @@
#include "app-match.h"
#include <glib.h>
#include "file-utils.h"
using namespace Zeeker;
static AppMatch *app_match_Class = nullptr;
AppMatch *AppMatch::getAppMatch()
{
if (!app_match_Class) {
AppMatch *AppMatch::getAppMatch() {
if(!app_match_Class) {
app_match_Class = new AppMatch;
}
return app_match_Class;
@ -33,140 +32,130 @@ AppMatch *AppMatch::getAppMatch()
AppMatch::AppMatch(QObject *parent) : QThread(parent)
// m_versionCommand(new QProcess(this))
{
m_watchAppDir=new QFileSystemWatcher(this);
m_watchAppDir = new QFileSystemWatcher(this);
m_watchAppDir->addPath("/usr/share/applications/");
qDBusRegisterMetaType<QMap<QString,QString>>();
qDBusRegisterMetaType<QList<QMap<QString,QString>>>();
m_interFace=new QDBusInterface ("com.kylin.softwarecenter.getsearchresults", "/com/kylin/softwarecenter/getsearchresults",
"com.kylin.getsearchresults",
QDBusConnection::sessionBus());
if (!m_interFace->isValid())
{
QDir androidPath(QDir::homePath() + "/.local/share/applications/");
if(androidPath.exists()) {
m_watchAppDir->addPath(QDir::homePath() + "/.local/share/applications/");
}
qDBusRegisterMetaType<QMap<QString, QString>>();
qDBusRegisterMetaType<QList<QMap<QString, QString>>>();
m_interFace = new QDBusInterface("com.kylin.softwarecenter.getsearchresults", "/com/kylin/softwarecenter/getsearchresults",
"com.kylin.getsearchresults",
QDBusConnection::sessionBus());
if(!m_interFace->isValid()) {
qWarning() << qPrintable(QDBusConnection::sessionBus().lastError().message());
}
qDebug()<<"AppMatch is new";
}
qDebug() << "AppMatch is new";
}
AppMatch::~AppMatch(){
if(m_interFace){
AppMatch::~AppMatch() {
if(m_interFace) {
delete m_interFace;
}
m_interFace=NULL;
if(m_watchAppDir){
m_interFace = NULL;
if(m_watchAppDir) {
delete m_watchAppDir;
}
m_watchAppDir=NULL;
m_watchAppDir = NULL;
}
void AppMatch::startMatchApp(QString input,QMap<NameString,QStringList> &installed,QMap<NameString,QStringList> &softwarereturn){
input.replace(" ","");
if(input.isEmpty())
return;
m_sourceText=input;
void AppMatch::startMatchApp(QString input, QMap<NameString, QStringList> &installed, QMap<NameString, QStringList> &softwarereturn) {
m_sourceText = input;
getAppName(installed);
softWareCenterSearch(softwarereturn);
qDebug()<<"match app is successful!";
qDebug() << "match app is successful!";
}
/**
* @brief AppMatch::getAllDesktopFilePath desktop文件
* @param path desktop文件夹
*/
void AppMatch::getAllDesktopFilePath(QString path){
void AppMatch::getAllDesktopFilePath(QString path) {
char* name;
char* icon;
QStringList applist;
GKeyFileFlags flags=G_KEY_FILE_NONE;
GKeyFile* keyfile=g_key_file_new ();
GKeyFileFlags flags = G_KEY_FILE_NONE;
GKeyFile* keyfile = g_key_file_new();
QDir dir(path);
if (!dir.exists()) {
if(!dir.exists()) {
return;
}
dir.setFilter(QDir::Dirs|QDir::Files|QDir::NoDotAndDotDot);
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
QFileInfoList list = dir.entryInfoList();
list.removeAll(QFileInfo("/usr/share/applications/screensavers"));
if(list.size()< 1 ) {
if(list.size() < 1) {
return;
}
int i=0;
int i = 0;
//递归算法的核心部分
do{
do {
QFileInfo fileInfo = list.at(i);
//如果是文件夹,递归
bool isDir = fileInfo.isDir();
if(isDir) {
getAllDesktopFilePath(fileInfo.filePath());
}
else{
} else {
//过滤LXQt、KDE
QString filePathStr=fileInfo.filePath();
if(filePathStr.contains("KDE",Qt::CaseInsensitive)||
QString filePathStr = fileInfo.filePath();
// if(filePathStr.contains("KDE", Qt::CaseInsensitive) ||
// filePathStr.contains("mate",Qt::CaseInsensitive)||
filePathStr.contains("LX",Qt::CaseInsensitive) ){
i++;
continue;
}
// filePathStr.contains("LX", Qt::CaseInsensitive)) {
// i++;
// continue;
// }
//过滤后缀不是.desktop的文件
if(!filePathStr.endsWith(".desktop"))
{
if(!filePathStr.endsWith(".desktop")) {
i++;
continue;
}
QByteArray fpbyte=filePathStr.toLocal8Bit();
char* filepath=fpbyte.data();
g_key_file_load_from_file(keyfile,filepath,flags,nullptr);
char* ret_1=g_key_file_get_locale_string(keyfile,"Desktop Entry","NoDisplay", nullptr, nullptr);
if(ret_1!=nullptr)
{
QString str=QString::fromLocal8Bit(ret_1);
if(str.contains("true"))
{
QByteArray fpbyte = filePathStr.toLocal8Bit();
char* filepath = fpbyte.data();
g_key_file_load_from_file(keyfile, filepath, flags, nullptr);
char* ret_1 = g_key_file_get_locale_string(keyfile, "Desktop Entry", "NoDisplay", nullptr, nullptr);
if(ret_1 != nullptr) {
QString str = QString::fromLocal8Bit(ret_1);
if(str.contains("true")) {
i++;
continue;
}
}
char* ret_2=g_key_file_get_locale_string(keyfile,"Desktop Entry","NotShowIn", nullptr, nullptr);
if(ret_2!=nullptr)
{
QString str=QString::fromLocal8Bit(ret_2);
if(str.contains("UKUI"))
{
char* ret_2 = g_key_file_get_locale_string(keyfile, "Desktop Entry", "NotShowIn", nullptr, nullptr);
if(ret_2 != nullptr) {
QString str = QString::fromLocal8Bit(ret_2);
if(str.contains("UKUI")) {
i++;
continue;
}
}
//过滤中英文名为空的情况
QLocale cn;
QString language=cn.languageToString(cn.language());
if(QString::compare(language,"Chinese")==0)
{
char* nameCh=g_key_file_get_string(keyfile,"Desktop Entry","Name[zh_CN]", nullptr);
char* nameEn=g_key_file_get_string(keyfile,"Desktop Entry","Name", nullptr);
if(QString::fromLocal8Bit(nameCh).isEmpty() && QString::fromLocal8Bit(nameEn).isEmpty())
{
QString language = cn.languageToString(cn.language());
if(QString::compare(language, "Chinese") == 0) {
char* nameCh = g_key_file_get_string(keyfile, "Desktop Entry", "Name[zh_CN]", nullptr);
char* nameEn = g_key_file_get_string(keyfile, "Desktop Entry", "Name", nullptr);
if(QString::fromLocal8Bit(nameCh).isEmpty() && QString::fromLocal8Bit(nameEn).isEmpty()) {
i++;
continue;
}
} else {
char* name = g_key_file_get_string(keyfile, "Desktop Entry", "Name", nullptr);
if(QString::fromLocal8Bit(name).isEmpty()) {
i++;
continue;
}
}
else {
char* name=g_key_file_get_string(keyfile,"Desktop Entry","Name", nullptr);
if(QString::fromLocal8Bit(name).isEmpty())
{
i++;
continue;
}
}
name=g_key_file_get_locale_string(keyfile,"Desktop Entry","Name", nullptr, nullptr);
icon=g_key_file_get_locale_string(keyfile,"Desktop Entry","Icon", nullptr, nullptr);
if(!m_filePathList.contains(filePathStr)){
name = g_key_file_get_locale_string(keyfile, "Desktop Entry", "Name", nullptr, nullptr);
icon = g_key_file_get_locale_string(keyfile, "Desktop Entry", "Icon", nullptr, nullptr);
if(!m_filePathList.contains(filePathStr)) {
NameString appname;
appname.app_name = QString::fromLocal8Bit(name);
m_installAppMap.insert(appname,applist<<filePathStr<<QString::fromLocal8Bit(icon)<<""<<"");
applist.clear();
m_installAppMap.insert(appname, applist << filePathStr << QString::fromLocal8Bit(icon) << "" << "");
applist.clear();
}
// m_filePathList.append(filePathStr);
}
@ -180,8 +169,7 @@ void AppMatch::getAllDesktopFilePath(QString path){
* @brief AppMatch::getDesktopFilePath
*
*/
void AppMatch::getDesktopFilePath()
{
void AppMatch::getDesktopFilePath() {
m_filePathList.clear();
m_installAppMap.clear();
m_filePathList.append("/usr/share/applications/software-properties-livepatch.desktop");
@ -238,13 +226,12 @@ void AppMatch::getDesktopFilePath()
// }
}
void AppMatch::getAppName(QMap<NameString,QStringList> &installed)
{
void AppMatch::getAppName(QMap<NameString, QStringList> &installed) {
QMap<NameString, QStringList>::const_iterator i;
for(i=m_installAppMap.constBegin();i!=m_installAppMap.constEnd();++i){
appNameMatch(i.key().app_name,installed);
}
qDebug()<<"installed app match is successful!";
for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) {
appNameMatch(i.key().app_name, installed);
}
qDebug() << "installed app match is successful!";
}
/**
@ -253,67 +240,63 @@ void AppMatch::getAppName(QMap<NameString,QStringList> &installed)
* @param appname
*
*/
void AppMatch::appNameMatch(QString appname,QMap<NameString,QStringList> &installed){
void AppMatch::appNameMatch(QString appname, QMap<NameString, QStringList> &installed) {
NameString name{appname};
QStringList list;
QStringList pinyinlist;
pinyinlist=FileUtils::findMultiToneWords(appname);
QMapIterator<NameString,QStringList> iter(m_installAppMap);
while(iter.hasNext())
{
pinyinlist = FileUtils::findMultiToneWords(appname);
QMapIterator<NameString, QStringList> iter(m_installAppMap);
while(iter.hasNext()) {
iter.next();
if (iter.key().app_name == appname) {
if(iter.key().app_name == appname) {
list = iter.value();
break;
}
}
if(appname.contains(m_sourceText,Qt::CaseInsensitive)){
if(appname.contains(m_sourceText, Qt::CaseInsensitive)) {
// installed.insert(name,m_installAppMap.value(name));
installed.insert(name,list);
installed.insert(name, list);
return;
}
for(int i = 0;i<pinyinlist.size()/2;i++){
QString shouzimu=pinyinlist.at(2*i+1);// 中文转首字母
if(shouzimu.contains(m_sourceText,Qt::CaseInsensitive)){
for(int i = 0; i < pinyinlist.size() / 2; i++) {
QString shouzimu = pinyinlist.at(2 * i + 1); // 中文转首字母
if(shouzimu.contains(m_sourceText, Qt::CaseInsensitive)) {
// installed.insert(name,m_installAppMap.value(name));
installed.insert(name,list);
installed.insert(name, list);
return;
}
if(m_sourceText.size()<2)
if(m_sourceText.size() < 2)
return;
QString pinyin=pinyinlist.at(2*i);// 中文转拼音
if(pinyin.contains(m_sourceText,Qt::CaseInsensitive)){
QString pinyin = pinyinlist.at(2 * i); // 中文转拼音
if(pinyin.contains(m_sourceText, Qt::CaseInsensitive)) {
// installed.insert(name,m_installAppMap.value(name));
installed.insert(name,list);
installed.insert(name, list);
return;
}
}
}
void AppMatch::softWareCenterSearch(QMap<NameString,QStringList> &softwarereturn){
if(m_interFace->timeout()!=-1){
qWarning()<<"softWareCente Dbus is timeout !";
void AppMatch::softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn) {
if(m_interFace->timeout() != -1) {
qWarning() << "softWareCente Dbus is timeout !";
return;
}
slotDBusCallFinished(softwarereturn);
qDebug()<<"softWareCenter match app is successful!";
qDebug() << "softWareCenter match app is successful!";
}
void AppMatch::slotDBusCallFinished(QMap<NameString,QStringList> &softwarereturn){
QDBusReply<QList<QMap<QString,QString>>> reply = m_interFace->call("get_search_result",m_sourceText); //阻塞,直到远程方法调用完成。
void AppMatch::slotDBusCallFinished(QMap<NameString, QStringList> &softwarereturn) {
QDBusReply<QList<QMap<QString, QString>>> reply = m_interFace->call("get_search_result", m_sourceText); //阻塞,直到远程方法调用完成。
// QDBusPendingReply<QList<QMap<QString,QString>>> reply = *call;
if (reply.isValid())
{
parseSoftWareCenterReturn(reply.value(),softwarereturn);
}
else
{
qWarning() << "value method called failed!";
}
if(reply.isValid()) {
parseSoftWareCenterReturn(reply.value(), softwarereturn);
} else {
qWarning() << "value method called failed!";
}
// call->deleteLater();
}
void AppMatch::parseSoftWareCenterReturn(QList<QMap<QString,QString>> list,QMap<NameString,QStringList> &softwarereturn){
void AppMatch::parseSoftWareCenterReturn(QList<QMap<QString, QString>> list, QMap<NameString, QStringList> &softwarereturn) {
// qWarning()<<list;
QString appname;
NameString name;
@ -322,24 +305,24 @@ void AppMatch::parseSoftWareCenterReturn(QList<QMap<QString,QString>> list,QMap<
QStringList applist;
QLocale locale;
QString pkgname;
for(int i=0;i<list.size();i++){
for(int i = 0; i < list.size(); i++) {
// qWarning()<<list.at(i).keys();
if(locale.language()==QLocale::Chinese){
appname=list.at(i).value("displayname_cn");
pkgname = list.at(i).value("appname");
if(locale.language() == QLocale::Chinese) {
appname = list.at(i).value("displayname_cn");
pkgname = list.at(i).value("appname");
}
if(locale.language()==QLocale::English){
appname=list.at(i).value("appname");
if(locale.language() == QLocale::English) {
appname = list.at(i).value("appname");
}
appdiscription=list.at(i).value("discription");
appicon=list.at(i).value("icon");
appdiscription = list.at(i).value("discription");
appicon = list.at(i).value("icon");
name.app_name = appname;
pkgname.isEmpty() ? softwarereturn.insert(name,applist<<""<<appicon<<""<<appdiscription) : softwarereturn.insert(name,applist<<""<<appicon<<pkgname<<appdiscription);
pkgname.isEmpty() ? softwarereturn.insert(name, applist << "" << appicon << "" << appdiscription) : softwarereturn.insert(name, applist << "" << appicon << pkgname << appdiscription);
applist.clear();
}
}
void AppMatch::getInstalledAppsVersion(QString appname){
void AppMatch::getInstalledAppsVersion(QString appname) {
// qWarning()<<"apt show "+appname;
// m_versionCommand->start("apt show "+appname);
// m_versionCommand->startDetached(m_versionCommand->program());
@ -359,12 +342,22 @@ void AppMatch::getInstalledAppsVersion(QString appname){
// m_versionCommand->close();
}
void AppMatch::run(){
qDebug()<<"AppMatch is run";
void AppMatch::run() {
qDebug() << "AppMatch is run";
this->getDesktopFilePath();
this->getAllDesktopFilePath("/usr/share/applications/");
connect(m_watchAppDir,&QFileSystemWatcher::directoryChanged,[this](){
QDir androidPath(QDir::homePath() + "/.local/share/applications/");
if(androidPath.exists())
this->getAllDesktopFilePath(QDir::homePath() + "/.local/share/applications/");
connect(m_watchAppDir, &QFileSystemWatcher::directoryChanged, this, [ = ](const QString & path) {
this->getDesktopFilePath();
this->getAllDesktopFilePath("/usr/share/applications/");
if(path == "/usr/share/applications/") {
this->getAllDesktopFilePath("/usr/share/applications/");
}
if(androidPath.exists()) {
if(path == QDir::homePath() + "/.local/share/applications/") {
this->getAllDesktopFilePath(QDir::homePath() + "/.local/share/applications/");
}
}
});
}

View File

@ -29,9 +29,8 @@
#include <QtDBus>
#include <QElapsedTimer>
#include <QThread>
class NameString
{
namespace Zeeker {
class NameString {
public:
explicit NameString(const QString &str_) : app_name(str_) {}
NameString() = default;
@ -52,25 +51,24 @@ public:
// }
//};
class AppMatch : public QThread
{
class AppMatch : public QThread {
Q_OBJECT
public:
static AppMatch *getAppMatch();
void startMatchApp(QString input,QMap<NameString,QStringList> &installed,QMap<NameString,QStringList> &softwarereturn);
void startMatchApp(QString input, QMap<NameString, QStringList> &installed, QMap<NameString, QStringList> &softwarereturn);
private:
explicit AppMatch(QObject *parent = nullptr);
~AppMatch();
void getAllDesktopFilePath(QString path);
void getDesktopFilePath();
void getAppName(QMap<NameString,QStringList> &installed);
void getAppName(QMap<NameString, QStringList> &installed);
// void appNameMatch(QString appname,QString desktoppath,QString appicon);
void appNameMatch(QString appname,QMap<NameString,QStringList> &installed);
void appNameMatch(QString appname, QMap<NameString, QStringList> &installed);
void softWareCenterSearch(QMap<NameString,QStringList> &softwarereturn);
void softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn);
void parseSoftWareCenterReturn(QList<QMap<QString,QString>> list,QMap<NameString,QStringList> &softwarereturn);
void parseSoftWareCenterReturn(QList<QMap<QString, QString>> list, QMap<NameString, QStringList> &softwarereturn);
void getInstalledAppsVersion(QString appname);
@ -78,12 +76,12 @@ private:
QString m_sourceText;
QStringList m_filePathList;
QDBusInterface *m_interFace=nullptr;
QFileSystemWatcher *m_watchAppDir=nullptr;
QMap<NameString,QStringList> m_installAppMap;
QDBusInterface *m_interFace = nullptr;
QFileSystemWatcher *m_watchAppDir = nullptr;
QMap<NameString, QStringList> m_installAppMap;
private Q_SLOTS:
void slotDBusCallFinished(QMap<NameString,QStringList> &softwarereturn);
void slotDBusCallFinished(QMap<NameString, QStringList> &softwarereturn);
//Q_SIGNALS:
@ -91,5 +89,6 @@ protected:
void run() override;
};
}
#endif // APPMATCH_H

7
libsearch/common.h Normal file
View File

@ -0,0 +1,7 @@
#ifndef COMMON_H
#define COMMON_H
#define UKUI_SEARCH_PIPE_PATH (QDir::homePath()+"/.config/org.ukui/ukui-search/ukuisearch").toLocal8Bit().constData()
//TODO Put things that needed to be put here here.
#endif // COMMON_H

View File

@ -20,31 +20,19 @@
*
*/
#include "file-utils.h"
#include <QDebug>
#include <QFile>
#include <QFileInfo>
#include <QUrl>
#include <QMap>
#include "quazip/quazip.h"
#include <quazip/quazipfile.h>
#include <QDomDocument>
#include <QQueue>
#include "uchardet/uchardet.h"
#include "poppler-qt5.h"
using namespace Zeeker;
size_t FileUtils::_max_index_count = 0;
size_t FileUtils::_current_index_count = 0;
unsigned short FileUtils::_index_status = 0;
FileUtils::SearchMethod FileUtils::searchMethod = FileUtils::SearchMethod::DIRECTSEARCH;
QMap<QString, QStringList> FileUtils::map_chinese2pinyin = QMap<QString, QStringList>();
FileUtils::FileUtils()
{
FileUtils::FileUtils() {
}
std::string FileUtils::makeDocUterm(QString path)
{
return QCryptographicHash::hash(path.toUtf8(),QCryptographicHash::Md5).toHex().toStdString();
std::string FileUtils::makeDocUterm(QString path) {
return QCryptographicHash::hash(path.toUtf8(), QCryptographicHash::Md5).toHex().toStdString();
}
/**
@ -53,30 +41,29 @@ std::string FileUtils::makeDocUterm(QString path)
* @param checkValid
* @return
*/
QIcon FileUtils::getFileIcon(const QString &uri, bool checkValid)
{
QIcon FileUtils::getFileIcon(const QString &uri, bool checkValid) {
auto file = wrapGFile(g_file_new_for_uri(uri.toUtf8().constData()));
auto info = wrapGFileInfo(g_file_query_info(file.get()->get(),
G_FILE_ATTRIBUTE_STANDARD_ICON,
G_FILE_QUERY_INFO_NONE,
nullptr,
nullptr));
if (!G_IS_FILE_INFO (info.get()->get()))
G_FILE_ATTRIBUTE_STANDARD_ICON,
G_FILE_QUERY_INFO_NONE,
nullptr,
nullptr));
if(!G_IS_FILE_INFO(info.get()->get()))
return QIcon::fromTheme("unknown");
GIcon *g_icon = g_file_info_get_icon (info.get()->get());
GIcon *g_icon = g_file_info_get_icon(info.get()->get());
QString icon_name;
//do not unref the GIcon from info.
if (G_IS_ICON(g_icon)) {
const gchar* const* icon_names = g_themed_icon_get_names(G_THEMED_ICON (g_icon));
if (icon_names) {
if(G_IS_ICON(g_icon)) {
const gchar* const* icon_names = g_themed_icon_get_names(G_THEMED_ICON(g_icon));
if(icon_names) {
auto p = icon_names;
if (*p)
icon_name = QString (*p);
if (checkValid) {
while (*p) {
if(*p)
icon_name = QString(*p);
if(checkValid) {
while(*p) {
QIcon icon = QIcon::fromTheme(*p);
if (!icon.isNull()) {
icon_name = QString (*p);
if(!icon.isNull()) {
icon_name = QString(*p);
break;
} else {
p++;
@ -85,7 +72,7 @@ QIcon FileUtils::getFileIcon(const QString &uri, bool checkValid)
}
}
}
if (QIcon::fromTheme(icon_name).isNull()) {
if(QIcon::fromTheme(icon_name).isNull()) {
return QIcon::fromTheme("unknown");
}
return QIcon::fromTheme(icon_name);
@ -101,13 +88,13 @@ QIcon FileUtils::getAppIcon(const QString &path) {
ba = path.toUtf8();
GKeyFile * keyfile;
keyfile = g_key_file_new();
if (!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)){
g_key_file_free (keyfile);
if(!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)) {
g_key_file_free(keyfile);
return QIcon::fromTheme("unknown");
}
QString icon = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_ICON, NULL, NULL));
g_key_file_free(keyfile);
if (QIcon::fromTheme(icon).isNull()) {
if(QIcon::fromTheme(icon).isNull()) {
return QIcon(":/res/icons/desktop.png");
}
return QIcon::fromTheme(icon);
@ -121,17 +108,17 @@ QIcon FileUtils::getAppIcon(const QString &path) {
*/
QIcon FileUtils::getSettingIcon(const QString& setting, const bool& is_white) {
QString name = setting.left(setting.indexOf("/"));
if (! name.isEmpty()) {
if(! name.isEmpty()) {
name.replace(QString(name.at(0)), QString(name.at(0).toUpper()));
}
QString path;
if (is_white) {
if(is_white) {
path = QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1White.svg").arg(name);
} else {
path = QString("/usr/share/ukui-control-center/shell/res/secondaryleftmenu/%1.svg").arg(name);
}
QFile file(path);
if (file.exists()) {
if(file.exists()) {
return QIcon(path);
} else {
return QIcon::fromTheme("ukui-control-center"); //无插件图标时,返回控制面板应用图标
@ -150,7 +137,7 @@ QIcon FileUtils::getSettingIcon(const QString& setting, const bool& is_white) {
*/
QString FileUtils::getFileName(const QString& uri) {
QFileInfo info(uri);
if (info.exists()) {
if(info.exists()) {
return info.fileName();
} else {
return "Unknown File";
@ -172,8 +159,8 @@ QString FileUtils::getAppName(const QString& path) {
ba = path.toUtf8();
GKeyFile * keyfile;
keyfile = g_key_file_new();
if (!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)){
g_key_file_free (keyfile);
if(!g_key_file_load_from_file(keyfile, ba.data(), G_KEY_FILE_NONE, NULL)) {
g_key_file_free(keyfile);
return "Unknown App";
}
QString name = QString(g_key_file_get_locale_string(keyfile, G_KEY_FILE_DESKTOP_GROUP, G_KEY_FILE_DESKTOP_KEY_NAME, NULL, NULL));
@ -191,10 +178,9 @@ QString FileUtils::getSettingName(const QString& setting) {
}
void FileUtils::loadHanziTable(const QString &fileName)
{
void FileUtils::loadHanziTable(const QString &fileName) {
QFile file(fileName);
if (!file.open(QFile::ReadOnly | QFile::Text)) {
if(!file.open(QFile::ReadOnly | QFile::Text)) {
qDebug("File: '%s' open failed!", file.fileName().toStdString().c_str());
return;
}
@ -210,25 +196,23 @@ void FileUtils::loadHanziTable(const QString &fileName)
return;
}
QMimeType FileUtils::getMimetype(QString &path)
{
QMimeType FileUtils::getMimetype(QString &path) {
QMimeDatabase mdb;
QMimeType type = mdb.mimeTypeForFile(path,QMimeDatabase::MatchContent);
QMimeType type = mdb.mimeTypeForFile(path, QMimeDatabase::MatchContent);
return type;
}
//aborted
QString FileUtils::find(const QString &hanzi)
{
QString FileUtils::find(const QString &hanzi) {
// static QMap<QString, QStringList> map = loadHanziTable("://index/pinyinWithoutTone.txt");
// static QMap<QString, QStringList> map;
QString output;
QStringList stringList = hanzi.split("");
/* 遍历查找汉字-拼音对照表的内容并将汉字替换为拼音 */
for (const QString &str : stringList) {
if (FileUtils::map_chinese2pinyin.contains(str))
for(const QString &str : stringList) {
if(FileUtils::map_chinese2pinyin.contains(str))
output += FileUtils::map_chinese2pinyin[str].first();
else
output += str;
@ -238,93 +222,88 @@ QString FileUtils::find(const QString &hanzi)
}
//DFS多音字太多直接GG
void stitchMultiToneWordsDFS(const QString& hanzi, const QString& resultAllPinYin, const QString& resultFirst, QStringList& resultList){
if (hanzi.size() == 0){
void stitchMultiToneWordsDFS(const QString& hanzi, const QString& resultAllPinYin, const QString& resultFirst, QStringList& resultList) {
if(hanzi.size() == 0) {
resultList.append(resultAllPinYin);
resultList.append(resultFirst);
return;
}
if (FileUtils::map_chinese2pinyin.contains(hanzi.at(0))){
for (auto i : FileUtils::map_chinese2pinyin[hanzi.at(0)]){
if(FileUtils::map_chinese2pinyin.contains(hanzi.at(0))) {
for(auto i : FileUtils::map_chinese2pinyin[hanzi.at(0)]) {
stitchMultiToneWordsDFS(hanzi.right(hanzi.size() - 1), resultAllPinYin + i, resultFirst + i.at(0), resultList);
}
}
else{
} else {
stitchMultiToneWordsDFS(hanzi.right(hanzi.size() - 1), resultAllPinYin + hanzi.at(0), resultFirst + hanzi.at(0), resultList);
}
}
//BFS+Stack多音字太多会爆栈
void stitchMultiToneWordsBFSStack(const QString& hanzi, QStringList& resultList){
void stitchMultiToneWordsBFSStack(const QString& hanzi, QStringList& resultList) {
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString> tempQueue;
tempHanzi = hanzi;
int tempQueueSize = 0;
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue.enqueue(i);
}
}
else{
} else {
tempQueue.enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
while (tempHanzi.size() != 0) {
while(tempHanzi.size() != 0) {
tempQueueSize = tempQueue.size();
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (int j = 0; j < tempQueueSize; ++j){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(int j = 0; j < tempQueueSize; ++j) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue.enqueue(tempQueue.head() + i);
}
tempQueue.dequeue();
}
}
else{
for (int j = 0; j < tempQueueSize; ++j){
} else {
for(int j = 0; j < tempQueueSize; ++j) {
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
tempQueue.dequeue();
}
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
while(!tempQueue.empty()){
while(!tempQueue.empty()) {
resultList.append(tempQueue.dequeue());
}
}
//BFS+Heap多音字太多会耗尽内存
void stitchMultiToneWordsBFSHeap(const QString& hanzi, QStringList& resultList){
void stitchMultiToneWordsBFSHeap(const QString& hanzi, QStringList& resultList) {
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString>* tempQueue = new QQueue<QString>;
tempHanzi = hanzi;
int tempQueueSize = 0;
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue->enqueue(i);
}
}
else{
} else {
tempQueue->enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
while (tempHanzi.size() != 0) {
while(tempHanzi.size() != 0) {
tempQueueSize = tempQueue->size();
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (int j = 0; j < tempQueueSize; ++j){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(int j = 0; j < tempQueueSize; ++j) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue->enqueue(tempQueue->head() + i);
}
tempQueue->dequeue();
}
}
else{
for (int j = 0; j < tempQueueSize; ++j){
} else {
for(int j = 0; j < tempQueueSize; ++j) {
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
tempQueue->dequeue();
}
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
while(!tempQueue->empty()){
while(!tempQueue->empty()) {
resultList.append(tempQueue->dequeue());
}
delete tempQueue;
@ -332,28 +311,27 @@ void stitchMultiToneWordsBFSHeap(const QString& hanzi, QStringList& resultList){
}
//BFS+Heap+超过3个多音字只建一个索引比较折中的方案
void stitchMultiToneWordsBFSHeapLess3(const QString& hanzi, QStringList& resultList){
void stitchMultiToneWordsBFSHeapLess3(const QString& hanzi, QStringList& resultList) {
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString>* tempQueue = new QQueue<QString>;
QQueue<QString>* tempQueueFirst = new QQueue<QString>;
tempHanzi = hanzi;
int tempQueueSize = 0;
int multiToneWordNum = 0;
for (auto i : hanzi){
if (FileUtils::map_chinese2pinyin.contains(i)){
if (FileUtils::map_chinese2pinyin[i].size() > 1){
for(auto i : hanzi) {
if(FileUtils::map_chinese2pinyin.contains(i)) {
if(FileUtils::map_chinese2pinyin[i].size() > 1) {
++multiToneWordNum;
}
}
}
if (multiToneWordNum > 3){
if(multiToneWordNum > 3) {
QString oneResult, oneResultFirst;
for (auto i : hanzi){
if (FileUtils::map_chinese2pinyin.contains(i)){
for(auto i : hanzi) {
if(FileUtils::map_chinese2pinyin.contains(i)) {
oneResult += FileUtils::map_chinese2pinyin[i].first();
oneResultFirst += FileUtils::map_chinese2pinyin[i].first().at(0);
}
else{
} else {
oneResult += i;
oneResultFirst += i;
}
@ -363,31 +341,29 @@ void stitchMultiToneWordsBFSHeapLess3(const QString& hanzi, QStringList& resultL
return;
}
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue->enqueue(i);
tempQueueFirst->enqueue(i.at(0));
}
}
else{
} else {
tempQueue->enqueue(tempHanzi.at(0));
tempQueueFirst->enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
while (tempHanzi.size() != 0) {
while(tempHanzi.size() != 0) {
tempQueueSize = tempQueue->size();
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (int j = 0; j < tempQueueSize; ++j){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(int j = 0; j < tempQueueSize; ++j) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue->enqueue(tempQueue->head() + i);
tempQueueFirst->enqueue(tempQueueFirst->head() + i.at(0));
}
tempQueue->dequeue();
tempQueueFirst->dequeue();
}
}
else{
for (int j = 0; j < tempQueueSize; ++j){
} else {
for(int j = 0; j < tempQueueSize; ++j) {
tempQueue->enqueue(tempQueue->head() + tempHanzi.at(0));
tempQueueFirst->enqueue(tempQueueFirst->head() + tempHanzi.at(0));
tempQueue->dequeue();
@ -396,7 +372,7 @@ void stitchMultiToneWordsBFSHeapLess3(const QString& hanzi, QStringList& resultL
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
while(!tempQueue->empty()){
while(!tempQueue->empty()) {
resultList.append(tempQueue->dequeue());
resultList.append(tempQueueFirst->dequeue());
}
@ -408,28 +384,27 @@ void stitchMultiToneWordsBFSHeapLess3(const QString& hanzi, QStringList& resultL
}
//BFS+Stack+超过3个多音字只建一个索引比较折中的方案
void stitchMultiToneWordsBFSStackLess3(const QString& hanzi, QStringList& resultList){
void stitchMultiToneWordsBFSStackLess3(const QString& hanzi, QStringList& resultList) {
QString tempHanzi, resultAllPinYin, resultFirst;
QQueue<QString> tempQueue;
QQueue<QString> tempQueueFirst;
tempHanzi = hanzi;
int tempQueueSize = 0;
int multiToneWordNum = 0;
for (auto i : hanzi){
if (FileUtils::map_chinese2pinyin.contains(i)){
if (FileUtils::map_chinese2pinyin[i].size() > 1){
for(auto i : hanzi) {
if(FileUtils::map_chinese2pinyin.contains(i)) {
if(FileUtils::map_chinese2pinyin[i].size() > 1) {
++multiToneWordNum;
}
}
}
if (multiToneWordNum > 3){
if(multiToneWordNum > 3) {
QString oneResult, oneResultFirst;
for (auto i : hanzi){
if (FileUtils::map_chinese2pinyin.contains(i)){
for(auto i : hanzi) {
if(FileUtils::map_chinese2pinyin.contains(i)) {
oneResult += FileUtils::map_chinese2pinyin[i].first();
oneResultFirst += FileUtils::map_chinese2pinyin[i].first().at(0);
}
else{
} else {
oneResult += i;
oneResultFirst += i;
}
@ -439,31 +414,29 @@ void stitchMultiToneWordsBFSStackLess3(const QString& hanzi, QStringList& result
return;
}
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue.enqueue(i);
tempQueueFirst.enqueue(i.at(0));
}
}
else{
} else {
tempQueue.enqueue(tempHanzi.at(0));
tempQueueFirst.enqueue(tempHanzi.at(0));
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
while (tempHanzi.size() != 0) {
while(tempHanzi.size() != 0) {
tempQueueSize = tempQueue.size();
if (FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))){
for (int j = 0; j < tempQueueSize; ++j){
for (auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]){
if(FileUtils::map_chinese2pinyin.contains(tempHanzi.at(0))) {
for(int j = 0; j < tempQueueSize; ++j) {
for(auto i : FileUtils::map_chinese2pinyin[tempHanzi.at(0)]) {
tempQueue.enqueue(tempQueue.head() + i);
tempQueueFirst.enqueue(tempQueueFirst.head() + i.at(0));
}
tempQueue.dequeue();
tempQueueFirst.dequeue();
}
}
else{
for (int j = 0; j < tempQueueSize; ++j){
} else {
for(int j = 0; j < tempQueueSize; ++j) {
tempQueue.enqueue(tempQueue.head() + tempHanzi.at(0));
tempQueueFirst.enqueue(tempQueueFirst.head() + tempHanzi.at(0));
tempQueue.dequeue();
@ -472,7 +445,7 @@ void stitchMultiToneWordsBFSStackLess3(const QString& hanzi, QStringList& result
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
}
while(!tempQueue.empty()){
while(!tempQueue.empty()) {
resultList.append(tempQueue.dequeue());
resultList.append(tempQueueFirst.dequeue());
}
@ -483,8 +456,7 @@ void stitchMultiToneWordsBFSStackLess3(const QString& hanzi, QStringList& result
return;
}
QStringList FileUtils::findMultiToneWords(const QString& hanzi)
{
QStringList FileUtils::findMultiToneWords(const QString& hanzi) {
// QStringList* output = new QStringList();
QStringList output;
QString tempAllPinYin, tempFirst;
@ -501,17 +473,16 @@ QStringList FileUtils::findMultiToneWords(const QString& hanzi)
* @param path: abs path
* @return docx to QString
*/
void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
{
void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
//fix me :optimized by xpath??
QFileInfo info = QFileInfo(path);
if(!info.exists()||info.isDir())
if(!info.exists() || info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("word/document.xml",QuaZip::csSensitive))
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive))
return;
QuaZipFile fileR(&file);
@ -522,18 +493,14 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
fileR.close();
QDomElement first = doc.firstChildElement("w:document");
QDomElement body = first.firstChildElement("w:body");
while(!body.isNull())
{
QDomElement wp= body.firstChildElement("w:p");
while(!wp.isNull())
{
QDomElement wr= wp.firstChildElement("w:r");
while(!wr.isNull())
{
while(!body.isNull()) {
QDomElement wp = body.firstChildElement("w:p");
while(!wp.isNull()) {
QDomElement wr = wp.firstChildElement("w:r");
while(!wr.isNull()) {
QDomElement wt = wr.firstChildElement("w:t");
textcontent.append(wt.text().replace("\n",""));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
{
textcontent.append(wt.text().replace("\n", ""));
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close();
return;
}
@ -547,20 +514,18 @@ void FileUtils::getDocxTextContent(QString &path,QString &textcontent)
return;
}
void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
{
void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
QFileInfo info = QFileInfo(path);
if(!info.exists()||info.isDir())
if(!info.exists() || info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
QString prefix("ppt/slides/slide");
QStringList fileList;
for(QString i : file.getFileNameList())
{
for(QString i : file.getFileNameList()) {
if(i.startsWith(prefix))
fileList<<i;
fileList << i;
}
if(fileList.isEmpty())
return;
@ -572,11 +537,9 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
QDomDocument doc;
QDomElement at;
// QDomNodeList atList;
for(int i =0;i<fileList.size();++i)
{
QString name = prefix + QString::number(i+1) + ".xml";
if(!file.setCurrentFile(name))
{
for(int i = 0; i < fileList.size(); ++i) {
QString name = prefix + QString::number(i + 1) + ".xml";
if(!file.setCurrentFile(name)) {
continue;
}
QuaZipFile fileR(&file);
@ -605,24 +568,18 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
// }
//This is ugly but seems more efficient when handel a large file.
sptree = doc.firstChildElement("p:sld").firstChildElement("p:cSld").firstChildElement("p:spTree");
while(!sptree.isNull())
{
sp= sptree.firstChildElement("p:sp");
while(!sp.isNull())
{
txbody= sp.firstChildElement("p:txBody");
while(!txbody.isNull())
{
while(!sptree.isNull()) {
sp = sptree.firstChildElement("p:sp");
while(!sp.isNull()) {
txbody = sp.firstChildElement("p:txBody");
while(!txbody.isNull()) {
ap = txbody.firstChildElement("a:p");
while(!ap.isNull())
{
while(!ap.isNull()) {
ar = ap.firstChildElement("a:r");
while(!ar.isNull())
{
while(!ar.isNull()) {
at = ar.firstChildElement("a:t");
textcontent.append(at.text().replace("\r","")).replace("\t","");
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
{
textcontent.append(at.text().replace("\r", "")).replace("\t", "");
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close();
return;
}
@ -641,16 +598,15 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent)
return;
}
void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
{
void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
QFileInfo info = QFileInfo(path);
if(!info.exists()||info.isDir())
if(!info.exists() || info.isDir())
return;
QuaZip file(path);
if(!file.open(QuaZip::mdUnzip))
return;
if(!file.setCurrentFile("xl/sharedStrings.xml",QuaZip::csSensitive))
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive))
return;
QuaZipFile fileR(&file);
@ -663,25 +619,19 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
QDomElement si;
QDomElement r;
QDomElement t;
while(!sst.isNull())
{
si= sst.firstChildElement("si");
while(!si.isNull())
{
r= si.firstChildElement("r");
if(r.isNull())
{
t= si.firstChildElement("t");
}
else
{
while(!sst.isNull()) {
si = sst.firstChildElement("si");
while(!si.isNull()) {
r = si.firstChildElement("r");
if(r.isNull()) {
t = si.firstChildElement("t");
} else {
t = r.firstChildElement("t");
}
if(t.isNull())
continue;
textcontent.append(t.text().replace("\r","").replace("\n",""));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
{
textcontent.append(t.text().replace("\r", "").replace("\n", ""));
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3) {
file.close();
return;
}
@ -693,46 +643,43 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent)
return;
}
void FileUtils::getPdfTextContent(QString &path, QString &textcontent)
{
void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
Poppler::Document *doc = Poppler::Document::load(path);
if(doc->isLocked())
return;
const QRectF qf;
int pageNum = doc->numPages();
for(int i = 0; i<pageNum; ++i)
{
textcontent.append(doc->page(i)->text(qf).replace("\n",""));
if(textcontent.length() >= MAX_CONTENT_LENGTH/3)
for(int i = 0; i < pageNum; ++i) {
textcontent.append(doc->page(i)->text(qf).replace("\n", ""));
if(textcontent.length() >= MAX_CONTENT_LENGTH / 3)
break;
}
delete doc;
return;
}
void FileUtils::getTxtContent(QString &path, QString &textcontent)
{
void FileUtils::getTxtContent(QString &path, QString &textcontent) {
QFile file(path);
if(!file.open(QIODevice::ReadOnly|QIODevice::Text))
if(!file.open(QIODevice::ReadOnly | QIODevice::Text))
return;
QByteArray encodedString = file.read(MAX_CONTENT_LENGTH);
uchardet_t chardet = uchardet_new();
if(uchardet_handle_data(chardet,encodedString.constData(),encodedString.size()) !=0)
qWarning()<<"Txt file encoding format detect fail!"<<path;
if(uchardet_handle_data(chardet, encodedString.constData(), encodedString.size()) != 0)
qWarning() << "Txt file encoding format detect fail!" << path;
uchardet_data_end(chardet);
const char *codec = uchardet_get_charset(chardet);
if(QTextCodec::codecForName(codec) == 0)
qWarning()<<"Unsupported Text encoding format"<<path<<QString::fromLocal8Bit(codec);
qWarning() << "Unsupported Text encoding format" << path << QString::fromLocal8Bit(codec);
QTextStream stream(encodedString,QIODevice::ReadOnly);
QTextStream stream(encodedString, QIODevice::ReadOnly);
stream.setCodec(codec);
uchardet_delete(chardet);
textcontent = stream.readAll().replace("\n","");
textcontent = stream.readAll().replace("\n", "");
file.close();
encodedString.clear();

View File

@ -21,13 +21,6 @@
*/
#ifndef FILEUTILS_H
#define FILEUTILS_H
#include "gobject-template.h"
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <QString>
#include <QCryptographicHash>
#include <QIcon>
@ -35,20 +28,38 @@
#include <QMimeDatabase>
#include <QMimeType>
#include <QDir>
#include <QDebug>
#include <QFile>
#include <QFileInfo>
#include <QUrl>
#include <QMap>
#include <QDomDocument>
#include <QQueue>
#include <quazip/quazipfile.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <quazip/quazip.h>
#include <uchardet/uchardet.h>
//#include <poppler-qt5.h>
#include <poppler/qt5/poppler-qt5.h>
#include "libsearch_global.h"
#include "gobject-template.h"
//#define INITIAL_STATE 0
//#define CREATING_INDEX 1
//#define FINISH_CREATING_INDEX 2
#define MAX_CONTENT_LENGTH 20480000
#define UKUI_SEARCH_PIPE_PATH (QDir::homePath()+"/.config/org.ukui/ukui-search/ukuisearch").toLocal8Bit().constData()
class LIBSEARCH_EXPORT FileUtils
{
namespace Zeeker {
class LIBSEARCH_EXPORT FileUtils {
public:
static std::string makeDocUterm(QString );
static std::string makeDocUterm(QString);
static QIcon getFileIcon(const QString &, bool checkValid = true);
static QIcon getAppIcon(const QString &);
static QIcon getSettingIcon(const QString &, const bool&);
@ -74,8 +85,13 @@ public:
static size_t _current_index_count; //this one has been Abandoned,do not use it.
static unsigned short _index_status;
enum class SearchMethod { DIRECTSEARCH = 0, INDEXSEARCH = 1};
static SearchMethod searchMethod;
private:
FileUtils();
};
}
#endif // FILEUTILS_H

View File

@ -19,49 +19,54 @@
*
*/
#include <QtConcurrent>
#include <QApplication>
#include <QPalette>
#include "global-settings.h"
using namespace Zeeker;
static GlobalSettings *global_instance_of_global_settings = nullptr;
GlobalSettings *GlobalSettings::getInstance()
{
if (!global_instance_of_global_settings) {
GlobalSettings *GlobalSettings::getInstance() {
if(!global_instance_of_global_settings) {
global_instance_of_global_settings = new GlobalSettings;
}
return global_instance_of_global_settings;
}
GlobalSettings::GlobalSettings(QObject *parent) : QObject(parent)
{
GlobalSettings::GlobalSettings(QObject *parent) : QObject(parent) {
m_settings = new QSettings(MAIN_SETTINGS, QSettings::IniFormat, this);
// m_settings->setAtomicSyncRequired(false);
m_block_dirs_settings = new QSettings(BLOCK_DIRS,QSettings::IniFormat, this);
m_block_dirs_settings = new QSettings(BLOCK_DIRS, QSettings::IniFormat, this);
m_block_dirs_settings->setIniCodec(QTextCodec::codecForName("UTF-8"));
m_block_dirs_settings->setValue("These_are_block_dirs_conf_for_ukui_search","0");
m_block_dirs_settings->sync();
m_confWatcher = new QFileSystemWatcher(this);
m_confWatcher->addPath(BLOCK_DIRS);
connect(m_confWatcher, &QFileSystemWatcher::fileChanged, this, [ & ]() {
m_block_dirs_settings->sync();
m_confWatcher->addPath(BLOCK_DIRS);
});
m_search_record_settings = new QSettings(SEARCH_HISTORY, QSettings::IniFormat , this);
m_search_record_settings = new QSettings(SEARCH_HISTORY, QSettings::IniFormat, this);
m_search_record_settings->setIniCodec(QTextCodec::codecForName("UTF-8"));
for(QString i:m_search_record_settings->allKeys())
{
for(QString i : m_search_record_settings->allKeys()) {
m_history.append(QUrl::fromPercentEncoding(i.toLocal8Bit()));
}
if(!QDBusConnection::sessionBus().connect("org.kylinssoclient.dbus",
"/org/kylinssoclient/path",
"org.freedesktop.kylinssoclient.interface",
"keyChanged",
this, SLOT(updateSearchHistory(QString))))
"/org/kylinssoclient/path",
"org.freedesktop.kylinssoclient.interface",
"keyChanged",
this, SLOT(updateSearchHistory(QString))))
qWarning()<<"Kylinssoclient Dbus connect fail!";
qWarning() << "Kylinssoclient Dbus connect fail!";
this->forceSync();
//the default number of transparency in mainwindow is 0.7
//if someone changes the num in mainwindow, here should be modified too
m_cache.insert(TRANSPARENCY_KEY, 0.7);
if (QGSettings::isSchemaInstalled(CONTROL_CENTER_PERSONALISE_GSETTINGS_ID)) {
if(QGSettings::isSchemaInstalled(CONTROL_CENTER_PERSONALISE_GSETTINGS_ID)) {
m_trans_gsettings = new QGSettings(CONTROL_CENTER_PERSONALISE_GSETTINGS_ID, QByteArray(), this);
connect(m_trans_gsettings, &QGSettings::changed, this, [=](const QString& key) {
if (key == TRANSPARENCY_KEY) {
connect(m_trans_gsettings, &QGSettings::changed, this, [ = ](const QString & key) {
if(key == TRANSPARENCY_KEY) {
m_cache.remove(TRANSPARENCY_KEY);
m_cache.insert(TRANSPARENCY_KEY, m_trans_gsettings->get(TRANSPARENCY_KEY).toDouble());
qApp->paletteChanged(qApp->palette());
@ -71,49 +76,59 @@ GlobalSettings::GlobalSettings(QObject *parent) : QObject(parent)
m_cache.insert(TRANSPARENCY_KEY, m_trans_gsettings->get(TRANSPARENCY_KEY).toDouble());
}
if (QGSettings::isSchemaInstalled(THEME_GSETTINGS_ID)) {
m_cache.insert(STYLE_NAME_KEY, "ukui-light");
m_cache.insert(FONT_SIZE_KEY, 11);
if(QGSettings::isSchemaInstalled(THEME_GSETTINGS_ID)) {
m_theme_gsettings = new QGSettings(THEME_GSETTINGS_ID, QByteArray(), this);
connect(m_theme_gsettings, &QGSettings::changed, this, [=](const QString& key) {
if (key == STYLE_NAME_KEY) {
connect(m_theme_gsettings, &QGSettings::changed, this, [ = ](const QString & key) {
if(key == STYLE_NAME_KEY) {
//当前主题改变时也发出paletteChanged信号通知主界面刷新
qApp->paletteChanged(qApp->palette());
m_cache.remove(STYLE_NAME_KEY);
m_cache.insert(STYLE_NAME_KEY, m_theme_gsettings->get(STYLE_NAME_KEY).toString());
} else if(key == FONT_SIZE_KEY) {
qApp->paletteChanged(qApp->palette());
m_cache.remove(FONT_SIZE_KEY);
m_cache.insert(FONT_SIZE_KEY, m_theme_gsettings->get(FONT_SIZE_KEY).toDouble());
} else if (key == ICON_THEME_KEY) {
qApp->paletteChanged(qApp->palette());
}
});
m_cache.remove(STYLE_NAME_KEY);
m_cache.insert(STYLE_NAME_KEY, m_theme_gsettings->get(STYLE_NAME_KEY).toString());
m_cache.remove(FONT_SIZE_KEY);
m_cache.insert(FONT_SIZE_KEY, m_theme_gsettings->get(FONT_SIZE_KEY).toDouble());
}
}
const QVariant GlobalSettings::getValue(const QString &key)
{
const QVariant GlobalSettings::getValue(const QString &key) {
return m_cache.value(key);
}
bool GlobalSettings::isExist(const QString &key)
{
bool GlobalSettings::isExist(const QString &key) {
return !m_cache.value(key).isNull();
}
void GlobalSettings::reset(const QString &key)
{
void GlobalSettings::reset(const QString &key) {
m_cache.remove(key);
QtConcurrent::run([=]() {
QtConcurrent::run([ = ]() {
// if (m_mutex.tryLock(1000)) {
m_settings->remove(key);
m_settings->sync();
m_settings->remove(key);
m_settings->sync();
// m_mutex.unlock();
// }
});
Q_EMIT this->valueChanged(key);
}
void GlobalSettings::resetAll()
{
void GlobalSettings::resetAll() {
QStringList tmp = m_cache.keys();
m_cache.clear();
for (auto key : tmp) {
for(auto key : tmp) {
Q_EMIT this->valueChanged(key);
}
QtConcurrent::run([=]() {
if (m_mutex.tryLock(1000)) {
QtConcurrent::run([ = ]() {
if(m_mutex.tryLock(1000)) {
m_settings->clear();
m_settings->sync();
m_mutex.unlock();
@ -121,12 +136,9 @@ void GlobalSettings::resetAll()
});
}
bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool remove)
{
if(remove)
{
if(path.isEmpty())
{
bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool remove) {
if(remove) {
if(path.isEmpty()) {
returnCode = PATH_EMPTY;
return false;
}
@ -134,21 +146,18 @@ bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool rem
m_block_dirs_settings->remove(path);
return true;
}
if(!path.startsWith("/home"))
{
if(!path.startsWith("/home")) {
// returnCode = QString(tr("I can only search your user directory, it doesn't make any sense if you block an directory which is not in user directory!"));
returnCode = PATH_NOT_IN_HOME;
return false;
}
//why QSetting's key can't start with "/"??
QString pathKey = path.right(path.length()-1);
QString pathKey = path.right(path.length() - 1);
QStringList blockDirs = m_block_dirs_settings->allKeys();
for(QString i:blockDirs)
{
if(pathKey.startsWith(i))
{
for(QString i : blockDirs) {
if(pathKey.startsWith(i)) {
// returnCode = QString(tr("My parent folder has been blocked!"));
returnCode = PATH_PARENT_BLOCKED;
return false;
@ -157,12 +166,11 @@ bool GlobalSettings::setBlockDirs(const QString &path, int &returnCode, bool rem
if(i.startsWith(pathKey))
m_block_dirs_settings->remove(i);
}
m_block_dirs_settings->setValue(pathKey,"0");
m_block_dirs_settings->setValue(pathKey, "0");
return true;
}
QStringList GlobalSettings::getBlockDirs()
{
QStringList GlobalSettings::getBlockDirs() {
return m_block_dirs_settings->allKeys();
}
@ -186,19 +194,17 @@ QStringList GlobalSettings::getBlockDirs()
// }
//}
void GlobalSettings::setSearchRecord(const QString &word, const QDateTime &time)
{
void GlobalSettings::setSearchRecord(const QString &word, const QDateTime &time) {
QStringList keys = m_search_record_settings->allKeys();
if(keys.contains(QString(QUrl::toPercentEncoding(word))))
m_history.removeOne(word);
m_search_record_settings->setValue(QString(QUrl::toPercentEncoding(word)), time.toString("yyyy-MM-dd hh:mm:ss"));
m_search_record_settings->setValue(QString(QUrl::toPercentEncoding(word)), time.toString("yyyy-MM-dd hh:mm:ss"));
if(keys.size() >= 20)
m_search_record_settings->remove(QString(QUrl::toPercentEncoding(m_history.takeFirst())));
m_search_record_settings->remove(QString(QUrl::toPercentEncoding(m_history.takeFirst())));
m_history.append(word);
}
QStringList GlobalSettings::getSearchRecord()
{
QStringList GlobalSettings::getSearchRecord() {
return m_history;
}
@ -249,32 +255,23 @@ QStringList GlobalSettings::getSearchRecord()
// return values;
//}
//here should be override
//MouseZhangZh
void GlobalSettings::setValue(const QString &key, const QVariant &value)
{
// qDebug()<<"setvalue========"<<key<<":"<<value;
//this method is designed for main process settings only!!
void GlobalSettings::setValue(const QString &key, const QVariant &value) {
m_cache.insert(key, value);
// m_settings->sync();
QtConcurrent::run([=]() {
// qDebug()<<m_settings->status();
// if (m_mutex.tryLock(1000)) {
// m_mutex.lock();
m_settings->setValue(key, value);
// qDebug()<<"setvalue========finish!!!"<<key<<":"<<value;
m_settings->sync();
// qDebug()<<"setvalue========sync!!!"<<key<<":"<<value;
// m_mutex.unlock();
// }
QtConcurrent::run([ = ]() {
if (m_mutex.tryLock(1000)) {
m_settings->setValue(key, value);
m_settings->sync();
m_mutex.unlock();
}
});
}
void GlobalSettings::forceSync(const QString &key)
{
void GlobalSettings::forceSync(const QString &key) {
m_settings->sync();
if (key.isNull()) {
if(key.isNull()) {
m_cache.clear();
for (auto key : m_settings->allKeys()) {
for(auto key : m_settings->allKeys()) {
m_cache.insert(key, m_settings->value(key));
}
} else {
@ -283,14 +280,11 @@ void GlobalSettings::forceSync(const QString &key)
}
}
void GlobalSettings::updateSearchHistory(QString key)
{
if(key == "search")
{
void GlobalSettings::updateSearchHistory(QString key) {
if(key == "search") {
m_search_record_settings->sync();
m_history.clear();
for(QString i:m_search_record_settings->allKeys())
{
for(QString i : m_search_record_settings->allKeys()) {
m_history.append(QUrl::fromPercentEncoding(i.toLocal8Bit()));
}
}

View File

@ -26,6 +26,7 @@
#include <QMutex>
#include <QVector>
#include <QDir>
#include <QFileSystemWatcher>
//#include <QGSettings>
//If use pkg_config, it wont build succes,why?????????
//My demo can build access yet.
@ -33,16 +34,17 @@
#include <QGSettings/QGSettings>
#include <QDBusConnection>
#include <QDBusInterface>
#include <QApplication>
#include "libsearch_global.h"
#define CONTROL_CENTER_PERSONALISE_GSETTINGS_ID "org.ukui.control-center.personalise"
#define TRANSPARENCY_KEY "transparency"
#define THEME_GSETTINGS_ID "org.ukui.style"
#define STYLE_NAME_KEY "styleName"
#define INDEX_DATABASE_STATE "index_database_state"
#define CONTENT_INDEX_DATABASE_STATE "content_index_database_state"
#define INDEX_GENERATOR_NORMAL_EXIT "index_generator_normal_exit"
#define INOTIFY_NORMAL_EXIT "inotify_normal_exit"
#define FONT_SIZE_KEY "systemFontSize"
#define ICON_THEME_KEY "iconThemeName"
#define ENABLE_CREATE_INDEX_ASK_DIALOG "enable_create_index_ask_dialog"
#define WEB_ENGINE "web_engine"
#define PATH_EMPTY 1;
#define PATH_NOT_IN_HOME 2;
@ -54,8 +56,8 @@
//#define CLOUD_HISTORY "history"
//#define CLOUD_APPLICATIONS "applications"
class LIBSEARCH_EXPORT GlobalSettings : public QObject
{
namespace Zeeker {
class LIBSEARCH_EXPORT GlobalSettings : public QObject {
Q_OBJECT
public:
static GlobalSettings *getInstance();
@ -63,8 +65,8 @@ public:
bool isExist(const QString&);
Q_SIGNALS:
void valueChanged (const QString&);
void transparencyChanged (const double&);
void valueChanged(const QString&);
void transparencyChanged(const double&);
public Q_SLOTS:
void setValue(const QString&, const QVariant&);
@ -78,7 +80,7 @@ public Q_SLOTS:
* @param true to remove blocking,false to set blocking,default set false.
* @return
*/
bool setBlockDirs(const QString& path, int &returnCode,bool remove = false);
bool setBlockDirs(const QString& path, int &returnCode, bool remove = false);
QStringList getBlockDirs();
// void appendCloudData(const QString& key, const QString& value);
void setSearchRecord(const QString &word, const QDateTime &time);
@ -101,11 +103,14 @@ private:
QSettings *m_search_record_settings;
QMap<QString, QVariant> m_cache;
QStringList m_history;
QFileSystemWatcher *m_confWatcher;
QMutex m_mutex;
// size_t test = 0;
};
}
#endif // GLOBALSETTINGS_H

View File

@ -25,21 +25,20 @@
template<class T>
class gobjecttemplate
{
class gobjecttemplate {
public:
//do not use this constructor.
gobjecttemplate();
gobjecttemplate(T *obj, bool ref = false) {
m_obj = obj;
if (ref) {
if(ref) {
g_object_ref(obj);
}
}
~gobjecttemplate() {
//qDebug()<<"~GObjectTemplate";
if (m_obj)
if(m_obj)
g_object_unref(m_obj);
}
@ -55,4 +54,5 @@ private:
std::shared_ptr<gobjecttemplate<GFile>> wrapGFile(GFile *file);
std::shared_ptr<gobjecttemplate<GFileInfo>> wrapGFileInfo(GFileInfo *info);
#endif // GT_H

View File

@ -26,18 +26,16 @@
//extern QList<Document> *_doc_list_path;
//extern QMutex _mutex_doc_list_path;
ConstructDocumentForPath::ConstructDocumentForPath(QVector<QString> list)
{
using namespace Zeeker;
ConstructDocumentForPath::ConstructDocumentForPath(QVector<QString> list) {
this->setAutoDelete(true);
m_list = std::move(list);
}
void ConstructDocumentForPath::run()
{
void ConstructDocumentForPath::run() {
// qDebug()<<"ConstructDocumentForPath";
if (!_doc_list_path)
_doc_list_path = new QList<Document>;
if(!Zeeker::_doc_list_path)
Zeeker::_doc_list_path = new QList<Document>;
// qDebug()<<_doc_list_path->size();
QString index_text = m_list.at(0).toLower();
QString sourcePath = m_list.at(1);
@ -45,7 +43,7 @@ void ConstructDocumentForPath::run()
//多音字版
//现加入首字母
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".",""));
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(m_list.at(0)).replace(".", ""));
// if(!pinyin_text_list.isEmpty())
// {
// for (QString& i : pinyin_text_list){
@ -56,7 +54,7 @@ void ConstructDocumentForPath::run()
// }
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep)));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
// qDebug()<<"sourcePath"<<sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep);
// qDebug() << "sourcePath ---------------------------: " << sourcePath;
// qDebug() << "sourcePath.section -------------------: " << sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep);
@ -68,72 +66,66 @@ void ConstructDocumentForPath::run()
doc.setUniqueTerm(uniqueterm);
doc.addTerm(upTerm);
doc.addValue(m_list.at(2));
/* QStringList temp;
// temp.append(index_text);
temp.append(pinyin_text_list)*/;
/* QStringList temp;
// temp.append(index_text);
temp.append(pinyin_text_list)*/;
int postingCount = 0;
while(postingCount < index_text.size())
{
while(postingCount < index_text.size()) {
// QVector<size_t> p;
// p.append(postingCount);
doc.addPosting(QUrl::toPercentEncoding(index_text.at(postingCount)).toStdString(),postingCount);
doc.addPosting(QUrl::toPercentEncoding(index_text.at(postingCount)).toStdString(), postingCount);
++postingCount;
}
int i = 0;
for (QString& s : pinyin_text_list)
{
for(QString& s : pinyin_text_list) {
i = 0;
while(i < s.size())
{
doc.addPosting(QUrl::toPercentEncoding(s.at(i)).toStdString(),postingCount);
while(i < s.size()) {
doc.addPosting(QUrl::toPercentEncoding(s.at(i)).toStdString(), postingCount);
++postingCount;
++i;
}
}
// QMetaObject::invokeMethod(m_indexGenerator,"appendDocListPath",Q_ARG(Document,doc));
_mutex_doc_list_path.lock();
_doc_list_path->append(doc);
_mutex_doc_list_path.unlock();
Zeeker::_mutex_doc_list_path.lock();
Zeeker::_doc_list_path->append(doc);
Zeeker::_mutex_doc_list_path.unlock();
// qDebug()<<"ConstructDocumentForPath finish";
return;
}
ConstructDocumentForContent::ConstructDocumentForContent(QString path)
{
ConstructDocumentForContent::ConstructDocumentForContent(QString path) {
this->setAutoDelete(true);
m_path = std::move(path);
}
void ConstructDocumentForContent::run()
{
void ConstructDocumentForContent::run() {
// qDebug() << "ConstructDocumentForContent currentThreadId()" << QThread::currentThreadId();
// 构造文本索引的document
if (!_doc_list_content)
_doc_list_content = new QList<Document>;
if(!Zeeker::_doc_list_content)
Zeeker::_doc_list_content = new QList<Document>;
QString content;
FileReader::getTextContent(m_path,content);
FileReader::getTextContent(m_path, content);
if(content.isEmpty())
return;
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/",0,-2,QString::SectionIncludeLeadingSep)));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000).toStdString());
Document doc;
doc.setData(content);
doc.setUniqueTerm(uniqueterm);
doc.addTerm(upTerm);
doc.addValue(m_path);
for(int i = 0;i<term.size();++i)
{
doc.addPosting(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
for(int i = 0; i < term.size(); ++i) {
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
}
_mutex_doc_list_content.lock();
_doc_list_content->append(doc);
_mutex_doc_list_content.unlock();
Zeeker::_mutex_doc_list_content.lock();
Zeeker::_doc_list_content->append(doc);
Zeeker::_mutex_doc_list_content.unlock();
content.clear();
content.squeeze();
term.clear();

View File

@ -25,11 +25,11 @@
//#include <QMetaObject>
#include "document.h"
#include "index-generator.h"
//extern QList<Document> *_doc_list_path;
//extern QMutex _mutex_doc_list_path;
namespace Zeeker {
class IndexGenerator;
class ConstructDocumentForPath : public QRunnable
{
class ConstructDocumentForPath : public QRunnable {
public:
explicit ConstructDocumentForPath(QVector<QString> list);
~ConstructDocumentForPath() = default;
@ -39,8 +39,7 @@ private:
QVector<QString> m_list;
};
class ConstructDocumentForContent : public QRunnable
{
class ConstructDocumentForContent : public QRunnable {
public:
explicit ConstructDocumentForContent(QString path);
~ConstructDocumentForContent() = default;
@ -49,5 +48,6 @@ protected:
private:
QString m_path;
};
}
#endif // CONSTRUCTDOCUMENT_H

View File

@ -19,51 +19,44 @@
*/
#include "document.h"
#include <QDebug>
void Document::setData(QString data)
{
using namespace Zeeker;
void Document::setData(QString &data) {
if(data.isEmpty())
return;
m_document.set_data(data.toStdString());
}
void Document::addPosting(std::string term,QVector<size_t> offset, int weight)
{
void Document::addPosting(std::string term, QVector<size_t> offset, int weight) {
if(term == "")
return;
if(term.length() > 240)
term = QString::fromStdString(term).left(30).toStdString();
for(size_t i : offset)
{
m_document.add_posting(term,i,weight);
for(size_t i : offset) {
m_document.add_posting(term, i, weight);
}
}
void Document::addPosting(std::string term, unsigned int offset, int weight)
{
void Document::addPosting(std::string term, unsigned int offset, int weight) {
if(term == "")
return;
if(term.length() > 240)
term = QString::fromStdString(term).left(30).toStdString();
m_document.add_posting(term,offset,weight);
m_document.add_posting(term, offset, weight);
}
void Document::addTerm(QString term)
{
void Document::addTerm(QString term) {
if(term.isEmpty())
return;
m_document.add_term(term.toStdString());
}
void Document::addValue(QString value)
{
m_document.add_value(1,value.toStdString());
void Document::addValue(QString value) {
m_document.add_value(1, value.toStdString());
}
void Document::setUniqueTerm(QString term)
{
void Document::setUniqueTerm(QString term) {
if(term.isEmpty())
return;
m_document.add_term(term.toStdString());
@ -71,26 +64,22 @@ void Document::setUniqueTerm(QString term)
// m_unique_term = new QString(term);
m_unique_term = std::move(term);
}
std::string Document::getUniqueTerm()
{
std::string Document::getUniqueTerm() {
// qDebug()<<"m_unique_term!"<<*m_unique_term;
// qDebug() << QString::fromStdString(m_unique_term.toStdString());
return m_unique_term.toStdString();
}
void Document::setIndexText(QStringList indexText)
{
void Document::setIndexText(QStringList indexText) {
// QStringList indexTextList = indexText;
// m_index_text = new QStringList(indexText);
m_index_text = std::move(indexText);
}
QStringList Document::getIndexText()
{
QStringList Document::getIndexText() {
return m_index_text;
}
Xapian::Document Document::getXapianDocument()
{
Xapian::Document Document::getXapianDocument() {
return m_document;
}

View File

@ -24,25 +24,24 @@
#include <QString>
#include <QStringList>
#include <QVector>
class Document
{
namespace Zeeker {
class Document {
public:
Document() = default;
~Document(){}
Document(const Document& other){
~Document() {}
Document(const Document& other) {
m_document = other.m_document;
m_index_text = other.m_index_text;
m_unique_term = other.m_unique_term;
}
void operator=(const Document& other){
m_document = other.m_document;
m_index_text = other.m_index_text;
m_unique_term = other.m_unique_term;
void operator=(const Document& other) {
m_document = other.m_document;
m_index_text = other.m_index_text;
m_unique_term = other.m_unique_term;
}
void setData(QString data);
void addPosting(std::string term, QVector<size_t> offset, int weight =1);
void addPosting(std::string term, unsigned int offset, int weight =1);
void setData(QString &data);
void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
void addPosting(std::string term, unsigned int offset, int weight = 1);
void addTerm(QString term);
void addValue(QString value);
void setUniqueTerm(QString term);
@ -56,5 +55,6 @@ private:
QString m_unique_term;
};
}
#endif // DOCUMENT_H

View File

@ -20,49 +20,37 @@
#include "file-reader.h"
#include "file-utils.h"
#include "binary-parser.h"
FileReader::FileReader(QObject *parent) : QObject(parent)
{
using namespace Zeeker;
FileReader::FileReader(QObject *parent) : QObject(parent) {
}
void FileReader::getTextContent(QString path, QString &textContent)
{
void FileReader::getTextContent(QString path, QString &textContent) {
QMimeType type = FileUtils::getMimetype(path);
QString name = type.name();
QFileInfo file(path);
QString strsfx = file.suffix();
if(name== "application/zip")
{
if(strsfx.endsWith( "docx"))
FileUtils::getDocxTextContent(path,textContent);
if(strsfx.endsWith( "pptx"))
FileUtils::getPptxTextContent(path,textContent);
if(strsfx.endsWith( "xlsx"))
FileUtils::getXlsxTextContent(path,textContent);
}
else if(name == "text/plain")
{
if(strsfx.endsWith( "txt"))
FileUtils::getTxtContent(path,textContent);
}
else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage")
{
if (strsfx.endsWith("doc") || strsfx.endsWith("dot") || strsfx.endsWith("wps") || strsfx.endsWith("ppt") ||
strsfx.endsWith("pps") ||strsfx.endsWith("dps") || strsfx.endsWith("et") || strsfx.endsWith("xls"))
{
if(name == "application/zip") {
if(strsfx.endsWith("docx"))
FileUtils::getDocxTextContent(path, textContent);
if(strsfx.endsWith("pptx"))
FileUtils::getPptxTextContent(path, textContent);
if(strsfx.endsWith("xlsx"))
FileUtils::getXlsxTextContent(path, textContent);
} else if(name == "text/plain") {
if(strsfx.endsWith("txt"))
FileUtils::getTxtContent(path, textContent);
} else if(type.inherits("application/msword") || type.name() == "application/x-ole-storage") {
if(strsfx.endsWith("doc") || strsfx.endsWith("dot") || strsfx.endsWith("wps") || strsfx.endsWith("ppt") ||
strsfx.endsWith("pps") || strsfx.endsWith("dps") || strsfx.endsWith("et") || strsfx.endsWith("xls")) {
KBinaryParser searchdata;
searchdata.RunParser(path,textContent);
searchdata.RunParser(path, textContent);
}
}
else if(name == "application/pdf")
{
if(strsfx.endsWith( "pdf"))
FileUtils::getPdfTextContent(path,textContent);
}
else
{
qWarning()<<"Unsupport format:["<<path<<"]["<<type.name()<<"]";
} else if(name == "application/pdf") {
if(strsfx.endsWith("pdf"))
FileUtils::getPdfTextContent(path, textContent);
} else {
qWarning() << "Unsupport format:[" << path << "][" << type.name() << "]";
}
return;

View File

@ -22,15 +22,14 @@
#include <QObject>
#include <QFileInfo>
class FileReader : public QObject
{
namespace Zeeker {
class FileReader : public QObject {
Q_OBJECT
public:
explicit FileReader(QObject *parent = nullptr);
~FileReader()=default;
~FileReader() = default;
static void getTextContent(QString path, QString &textContent);
};
}
#endif // FILEREADER_H

View File

@ -32,30 +32,23 @@ size_t FileSearcher::uniqueSymbol3 = 0;
QMutex FileSearcher::m_mutex1;
QMutex FileSearcher::m_mutex2;
QMutex FileSearcher::m_mutex3;
FileSearcher::FileSearcher(QObject *parent) : QObject(parent)
{
FileSearcher::FileSearcher(QObject *parent) : QObject(parent) {
}
FileSearcher::~FileSearcher()
{
FileSearcher::~FileSearcher() {
}
int FileSearcher::getCurrentIndexCount()
{
try
{
int FileSearcher::getCurrentIndexCount() {
try {
Xapian::Database db(INDEX_PATH);
return db.get_doccount();
}
catch(const Xapian::Error &e)
{
qWarning() <<QString::fromStdString(e.get_description());
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return 0;
}
}
void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResultFile,QQueue<QString> *searchResultDir,QQueue<QPair<QString,QStringList>> *searchResultContent)
{
void FileSearcher::onKeywordSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, QQueue<QPair<QString, QStringList>> *searchResultContent) {
m_mutex1.lock();
++uniqueSymbol1;
m_mutex1.unlock();
@ -71,16 +64,15 @@ void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResult
m_search_result_content = searchResultContent;
//file
QtConcurrent::run([&, uniqueSymbol1, keyword](){
QtConcurrent::run([&, uniqueSymbol1, keyword]() {
if(!m_search_result_file->isEmpty())
m_search_result_file->clear();
int begin = 0;
int num = 5;
int resultCount = 0;
int total = 0;
while(total < 100)
{
resultCount = keywordSearchfile(uniqueSymbol1,keyword,"0",1,begin,num);
while(total < 100) {
resultCount = keywordSearchfile(uniqueSymbol1, keyword, "0", 1, begin, num);
if(resultCount == 0 || resultCount == -1)
break;
total += resultCount;
@ -90,16 +82,15 @@ void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResult
});
// Q_EMIT this->resultFile(m_search_result_file);
//dir
QtConcurrent::run([&, uniqueSymbol2, keyword](){
QtConcurrent::run([&, uniqueSymbol2, keyword]() {
if(!m_search_result_dir->isEmpty())
m_search_result_dir->clear();
int begin = 0;
int num = 5;
int resultCount = 0;
int total = 0;
while(total<100)
{
resultCount = keywordSearchfile(uniqueSymbol2,keyword,"1",1,begin,num);
while(total < 100) {
resultCount = keywordSearchfile(uniqueSymbol2, keyword, "1", 1, begin, num);
if(resultCount == 0 || resultCount == -1)
break;
total += resultCount;
@ -109,7 +100,7 @@ void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResult
});
// Q_EMIT this->resultDir(m_search_result_dir);
//content
QtConcurrent::run([&, uniqueSymbol3, keyword](){
QtConcurrent::run([&, uniqueSymbol3, keyword]() {
if(!m_search_result_content->isEmpty())
m_search_result_content->clear();
int begin = 0;
@ -117,9 +108,8 @@ void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResult
int resultCount = 0;
int total = 0;
while(total<50)
{
resultCount = keywordSearchContent(uniqueSymbol3,keyword,begin,num);
while(total < 50) {
resultCount = keywordSearchContent(uniqueSymbol3, keyword, begin, num);
if(resultCount == 0 || resultCount == -1)
break;
total += resultCount;
@ -130,54 +120,45 @@ void FileSearcher::onKeywordSearch(QString keyword,QQueue<QString> *searchResult
// Q_EMIT this->resultContent(m_search_result_content);
}
int FileSearcher::keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num)
{
try
{
int FileSearcher::keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) {
try {
qDebug() << "--keywordSearchfile start--";
Xapian::Database db(INDEX_PATH);
Xapian::Query query = creatQueryForFileSearch(keyword,db);
Xapian::Query query = creatQueryForFileSearch(keyword, db);
Xapian::Enquire enquire(db);
Xapian::Query queryFile;
if(!value.isEmpty())
{
if(!value.isEmpty()) {
std::string slotValue = value.toStdString();
Xapian::Query queryValue = Xapian::Query(Xapian::Query::OP_VALUE_RANGE,slot,slotValue,slotValue);
queryFile = Xapian::Query(Xapian::Query::OP_AND,query,queryValue);
}
else
{
Xapian::Query queryValue = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, slot, slotValue, slotValue);
queryFile = Xapian::Query(Xapian::Query::OP_AND, query, queryValue);
} else {
queryFile = query;
}
qDebug() << "keywordSearchfile:"<<QString::fromStdString(queryFile.get_description());
qDebug() << "keywordSearchfile:" << QString::fromStdString(queryFile.get_description());
enquire.set_query(queryFile);
Xapian::MSet result = enquire.get_mset(begin, num);
int resultCount = result.size();
qDebug()<< "keywordSearchfile results count=" <<resultCount;
qDebug() << "keywordSearchfile results count=" << resultCount;
if(result.size() == 0)
return 0;
if(getResult(uniqueSymbol, result, value) == -1)
return -1;
qDebug()<< "--keywordSearchfile finish--";
qDebug() << "--keywordSearchfile finish--";
return resultCount;
}
catch(const Xapian::Error &e)
{
qWarning() <<QString::fromStdString(e.get_description());
qDebug()<< "--keywordSearchfile finish--";
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
qDebug() << "--keywordSearchfile finish--";
return -1;
}
}
int FileSearcher::keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin, int num)
{
try
{
qDebug()<<"--keywordSearchContent search start--";
int FileSearcher::keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin, int num) {
try {
qDebug() << "--keywordSearchContent search start--";
Xapian::Database db(CONTENT_INDEX_PATH);
Xapian::Enquire enquire(db);
@ -188,8 +169,7 @@ int FileSearcher::keywordSearchContent(size_t uniqueSymbol, QString keyword, int
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword);
//Creat a query
std::string words;
for(int i=0;i<sKeyWord.size();i++)
{
for(int i = 0; i < sKeyWord.size(); i++) {
words.append(sKeyWord.at(i).word).append(" ");
}
Xapian::Query query = qp.parse_query(words);
@ -201,7 +181,7 @@ int FileSearcher::keywordSearchContent(size_t uniqueSymbol, QString keyword, int
// qDebug()<<QString::fromStdString(sKeyWord.at(i).word);
// }
// Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
qDebug()<<"keywordSearchContent:"<<QString::fromStdString(query.get_description());
qDebug() << "keywordSearchContent:" << QString::fromStdString(query.get_description());
enquire.set_query(query);
@ -209,23 +189,20 @@ int FileSearcher::keywordSearchContent(size_t uniqueSymbol, QString keyword, int
int resultCount = result.size();
if(result.size() == 0)
return 0;
qDebug()<< "keywordSearchContent results count=" <<resultCount;
qDebug() << "keywordSearchContent results count=" << resultCount;
if(getContentResult(uniqueSymbol, result, words) == -1)
return -1;
qDebug()<< "--keywordSearchContent search finish--";
qDebug() << "--keywordSearchContent search finish--";
return resultCount;
}
catch(const Xapian::Error &e)
{
qWarning() <<QString::fromStdString(e.get_description());
qDebug()<< "--keywordSearchContent search finish--";
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
qDebug() << "--keywordSearchContent search finish--";
return -1;
}
}
Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Database &db)
{
Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Database &db) {
// Xapian::QueryParser qp;
// qp.set_default_op(Xapian::Query::OP_PHRASE);
// qp.set_database(db);
@ -241,12 +218,11 @@ Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Dat
//Creat a query
// Xapian::Query queryPhrase = qp.parse_query(userInput.toStdString(),Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_SYNONYM);
std::vector<Xapian::Query> v;
for(int i=0;i<userInput.size();i++)
{
for(int i = 0; i < userInput.size(); i++) {
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.at(i)).toStdString()));
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
}
Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
Xapian::Query queryPhrase = Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
// Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
// Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase);
@ -254,20 +230,17 @@ Xapian::Query FileSearcher::creatQueryForFileSearch(QString keyword, Xapian::Dat
return queryPhrase;
}
Xapian::Query FileSearcher::creatQueryForContentSearch(QString keyword, Xapian::Database &db)
{
Xapian::Query FileSearcher::creatQueryForContentSearch(QString keyword, Xapian::Database &db) {
}
int FileSearcher::getResult(size_t uniqueSymbol, Xapian::MSet &result, QString value)
{
int FileSearcher::getResult(size_t uniqueSymbol, Xapian::MSet &result, QString value) {
//QStringList *pathTobeDelete = new QStringList;
//Delete those path doc which is not already exist.
// QStringList searchResult = QStringList();
for (auto it = result.begin(); it != result.end(); ++it)
{
for(auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
Xapian::weight docScoreWeight = it.get_weight();
@ -279,24 +252,17 @@ int FileSearcher::getResult(size_t uniqueSymbol, Xapian::MSet &result, QString v
QFileInfo info(path);
if(!info.exists())
{
if(!info.exists()) {
// pathTobeDelete->append(QString::fromStdString(data));
qDebug()<<path<<"is not exist!!";
}
else
{
switch (value.toInt())
{
qDebug() << path << "is not exist!!";
} else {
switch(value.toInt()) {
case 1:
m_mutex1.lock();
if(uniqueSymbol == FileSearcher::uniqueSymbol1)
{
if(uniqueSymbol == FileSearcher::uniqueSymbol1) {
m_search_result_dir->enqueue(path);
m_mutex1.unlock();
}
else
{
} else {
m_mutex1.unlock();
return -1;
}
@ -304,13 +270,10 @@ int FileSearcher::getResult(size_t uniqueSymbol, Xapian::MSet &result, QString v
break;
case 0:
m_mutex2.lock();
if(uniqueSymbol == FileSearcher::uniqueSymbol2)
{
if(uniqueSymbol == FileSearcher::uniqueSymbol2) {
m_search_result_file->enqueue(path);
m_mutex2.unlock();
}
else
{
} else {
m_mutex2.unlock();
return -1;
}
@ -320,27 +283,25 @@ int FileSearcher::getResult(size_t uniqueSymbol, Xapian::MSet &result, QString v
}
// searchResult.append(path);
}
qDebug()<< "doc="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
qDebug() << "doc=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
}
// if(!pathTobeDelete->isEmpty())
// deleteAllIndex(pathTobeDelete)
return 0;
}
int FileSearcher::getContentResult(size_t uniqueSymbol, Xapian::MSet &result, std::string &keyWord)
{
int FileSearcher::getContentResult(size_t uniqueSymbol, Xapian::MSet &result, std::string &keyWord) {
//QStringList *pathTobeDelete = new QStringList;
//Delete those path doc which is not already exist.
QString wordTobeFound = QString::fromStdString(keyWord).section(" ",0,0);
QString wordTobeFound = QString::fromStdString(keyWord).section(" ", 0, 0);
int size = wordTobeFound.size();
int totalSize = QString::fromStdString(keyWord).size();
if(totalSize < 5)
totalSize = 5;
// QMap<QString,QStringList> searchResult;
for (auto it = result.begin(); it != result.end(); ++it)
{
for(auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
double docScoreWeight = it.get_weight();
@ -352,50 +313,43 @@ int FileSearcher::getContentResult(size_t uniqueSymbol, Xapian::MSet &result, st
QFileInfo info(path);
if(!info.exists())
{
if(!info.exists()) {
// pathTobeDelete->append(QString::fromStdString(data));
qDebug()<<path<<"is not exist!!";
qDebug() << path << "is not exist!!";
continue;
}
// Construct snippets containing keyword.
QStringList snippets;
auto term = doc.termlist_begin();
term.skip_to(wordTobeFound.toStdString());
int count =0;
for(auto pos = term.positionlist_begin();pos != term.positionlist_end()&&count < 6;++pos)
{
int count = 0;
for(auto pos = term.positionlist_begin(); pos != term.positionlist_end() && count < 6; ++pos) {
QByteArray snippetByte = QByteArray::fromStdString(data);
QString snippet = "..."+QString(snippetByte.left(*pos)).right(size +totalSize) + QString(snippetByte.mid(*pos,-1)).left(size+totalSize) + "...";
QString snippet = "..." + QString(snippetByte.left(*pos)).right(size + totalSize) + QString(snippetByte.mid(*pos, -1)).left(size + totalSize) + "...";
// qDebug()<<snippet;
snippets.append(snippet);
++count;
}
m_mutex3.lock();
if(uniqueSymbol == FileSearcher::uniqueSymbol3)
{
if(uniqueSymbol == FileSearcher::uniqueSymbol3) {
m_search_result_content->enqueue(qMakePair(path,snippets));
m_search_result_content->enqueue(qMakePair(path, snippets));
m_mutex3.unlock();
}
else
{
} else {
m_mutex3.unlock();
return -1;
}
// searchResult.insert(path,snippets);
qDebug()<< "path="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
qDebug() << "path=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
}
// if(!pathTobeDelete->isEmpty())
// deleteAllIndex(pathTobeDelete)
return 0;
}
bool FileSearcher::isBlocked(QString &path)
{
bool FileSearcher::isBlocked(QString &path) {
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
for(QString i : blockList)
{
for(QString i : blockList) {
if(path.startsWith(i.prepend("/")))
return true;
}

View File

@ -32,8 +32,7 @@
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
class FileSearcher : public QObject
{
class FileSearcher : public QObject {
Q_OBJECT
public:
explicit FileSearcher(QObject *parent = nullptr);
@ -49,14 +48,14 @@ public:
static QMutex m_mutex3;
public Q_SLOTS:
void onKeywordSearch(QString keyword,QQueue<QString> *searchResultFile,QQueue<QString> *searchResultDir,QQueue<QPair<QString,QStringList>> *searchResultContent);
void onKeywordSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, QQueue<QPair<QString, QStringList>> *searchResultContent);
Q_SIGNALS:
void resultFile(QQueue<QString> *);
void resultDir(QQueue<QString> *);
void resultContent(QQueue<QPair<QString,QStringList>> *);
void resultContent(QQueue<QPair<QString, QStringList>> *);
private:
int keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value,unsigned slot = 1,int begin = 0, int num = 20);
int keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value, unsigned slot = 1, int begin = 0, int num = 20);
int keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
/**
@ -70,13 +69,13 @@ private:
Xapian::Query creatQueryForContentSearch(QString keyword, Xapian::Database &db);
int getResult(size_t uniqueSymbol, Xapian::MSet &result, QString value);
int getContentResult(size_t uniqueSymbol, Xapian::MSet &result,std::string &keyWord);
int getContentResult(size_t uniqueSymbol, Xapian::MSet &result, std::string &keyWord);
bool isBlocked(QString &path);
QQueue<QString> *m_search_result_file = nullptr;
QQueue<QString> *m_search_result_dir = nullptr;
QQueue<QPair<QString,QStringList>> *m_search_result_content = nullptr;
QQueue<QPair<QString, QStringList>> *m_search_result_content = nullptr;
bool m_searching = false;
};

View File

@ -24,39 +24,11 @@
#define NEW_QUEUE(a) a = new QQueue<QString>(); qDebug("---------------------------%s %s %s new at %d..",__FILE__,__FUNCTION__,#a,__LINE__);
//#define DELETE_QUEUE(a )
FirstIndex::FirstIndex(const QString& path) : Traverse_BFS(path)
{
QString indexDataBaseStatus = GlobalSettings::getInstance()->getValue(INDEX_DATABASE_STATE).toString();
QString contentIndexDataBaseStatus = GlobalSettings::getInstance()->getValue(CONTENT_INDEX_DATABASE_STATE).toString();
QString inotifyIndexStatus = GlobalSettings::getInstance()->getValue(INOTIFY_NORMAL_EXIT).toString();
qDebug() << "indexDataBaseStatus: " << indexDataBaseStatus;
qDebug() << "contentIndexDataBaseStatus: " << contentIndexDataBaseStatus;
qDebug() << "inotifyIndexStatus: " << inotifyIndexStatus;
/* || contentIndexDataBaseStatus == ""*/
if (indexDataBaseStatus == ""){
this->bool_dataBaseExist = false;
}
else{
this->bool_dataBaseExist = true;
}
if (indexDataBaseStatus != "2" || contentIndexDataBaseStatus != "2" || inotifyIndexStatus != "2"){
this->bool_dataBaseStatusOK = false;
}
else{
this->bool_dataBaseStatusOK = true;
}
this->q_index = new QQueue<QVector<QString>>();
//this->q_content_index = new QQueue<QString>();
NEW_QUEUE(this->q_content_index);
// this->mlm = new MessageListManager();
using namespace Zeeker;
FirstIndex::FirstIndex() {
}
FirstIndex::~FirstIndex()
{
FirstIndex::~FirstIndex() {
qDebug() << "~FirstIndex";
if(this->q_index)
delete this->q_index;
@ -64,33 +36,70 @@ FirstIndex::~FirstIndex()
if(this->q_content_index)
delete this->q_content_index;
this->q_content_index = nullptr;
if (this->p_indexGenerator)
if(this->p_indexGenerator)
delete this->p_indexGenerator;
this->p_indexGenerator = nullptr;
qDebug() << "~FirstIndex end";
// delete this->mlm;
// this->mlm = nullptr;
}
void FirstIndex::DoSomething(const QFileInfo& fileInfo){
void FirstIndex::DoSomething(const QFileInfo& fileInfo) {
// qDebug() << "there are some shit here"<<fileInfo.fileName() << fileInfo.absoluteFilePath() << QString(fileInfo.isDir() ? "1" : "0");
this->q_index->enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
if ((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])){
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) {
this->q_content_index->enqueue(fileInfo.absoluteFilePath());
}
}
void FirstIndex::run(){
void FirstIndex::run() {
QTime t1 = QTime::currentTime();
// Create a fifo at ~/.config/org.ukui/ukui-search, the fifo is used to control the order of child processes' running.
QDir fifoDir = QDir(QDir::homePath() + "/.config/org.ukui/ukui-search");
if(!fifoDir.exists())
qDebug() << "create fifo path" << fifoDir.mkpath(fifoDir.absolutePath());
unlink(UKUI_SEARCH_PIPE_PATH);
int retval = mkfifo(UKUI_SEARCH_PIPE_PATH, 0777);
if(retval == -1) {
qCritical() << "creat fifo error!!";
syslog(LOG_ERR, "creat fifo error!!\n");
assert(false);
return;
}
qDebug() << "create fifo success\n";
QString indexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(INDEX_DATABASE_STATE).toString();
QString contentIndexDataBaseStatus = IndexStatusRecorder::getInstance()->getStatus(CONTENT_INDEX_DATABASE_STATE).toString();
QString inotifyIndexStatus = IndexStatusRecorder::getInstance()->getStatus(INOTIFY_NORMAL_EXIT).toString();
qDebug() << "indexDataBaseStatus: " << indexDataBaseStatus;
qDebug() << "contentIndexDataBaseStatus: " << contentIndexDataBaseStatus;
qDebug() << "inotifyIndexStatus: " << inotifyIndexStatus;
/* || contentIndexDataBaseStatus == ""*/
if(indexDataBaseStatus == "") {
this->bool_dataBaseExist = false;
} else {
this->bool_dataBaseExist = true;
}
if(indexDataBaseStatus != "2" || contentIndexDataBaseStatus != "2" || inotifyIndexStatus != "2") {
this->bool_dataBaseStatusOK = false;
} else {
this->bool_dataBaseStatusOK = true;
}
this->q_index = new QQueue<QVector<QString>>();
//this->q_content_index = new QQueue<QString>();
NEW_QUEUE(this->q_content_index);
// this->mlm = new MessageListManager();
int fifo_fd;
char buffer[2];
memset(buffer, 0, sizeof(buffer));
buffer[0] = 0x1;
buffer[1] = '\0';
fifo_fd = open(UKUI_SEARCH_PIPE_PATH, O_RDWR);
if(fifo_fd == -1)
{
if(fifo_fd == -1) {
perror("open fifo error\n");
assert(false);
}
@ -104,23 +113,20 @@ void FirstIndex::run(){
pid_t pid;
pid = fork();
if(pid == 0)
{
if(pid == 0) {
prctl(PR_SET_PDEATHSIG, SIGTERM);
prctl(PR_SET_NAME,"first-index");
if (this->bool_dataBaseExist){
if (this->bool_dataBaseStatusOK){
prctl(PR_SET_NAME, "first-index");
if(this->bool_dataBaseExist) {
if(this->bool_dataBaseStatusOK) {
::_exit(0);
}
else{
} else {
//if the parameter is false, index won't be rebuild
//if it is true, index will be rebuild
p_indexGenerator = IndexGenerator::getInstance(true,this);
p_indexGenerator = IndexGenerator::getInstance(true, this);
}
}
else{
} else {
// p_indexGenerator = IndexGenerator::getInstance(false,this);
p_indexGenerator = IndexGenerator::getInstance(true,this);
p_indexGenerator = IndexGenerator::getInstance(true, this);
}
QSemaphore sem(5);
@ -129,22 +135,22 @@ void FirstIndex::run(){
mutex2.lock();
mutex3.lock();
sem.acquire(4);
QtConcurrent::run([&](){
sem.acquire(1);
mutex1.unlock();
this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
this->Traverse();
FileUtils::_max_index_count = this->q_index->length();
qDebug()<<"max_index_count:"<<FileUtils::_max_index_count;
sem.release(5);
});
QtConcurrent::run([&](){
// QtConcurrent::run([&](){
sem.acquire(1);
mutex1.unlock();
this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
this->Traverse();
FileUtils::_max_index_count = this->q_index->length();
qDebug() << "max_index_count:" << FileUtils::_max_index_count;
sem.release(5);
// });
QtConcurrent::run([&]() {
sem.acquire(2);
mutex2.unlock();
qDebug() << "index start;";
QQueue<QVector<QString>>* tmp = new QQueue<QVector<QString>>();
while (!this->q_index->empty()) {
for (size_t i = 0; (i < 8192) && (!this->q_index->empty()); ++i){
while(!this->q_index->empty()) {
for(size_t i = 0; (i < 8192) && (!this->q_index->empty()); ++i) {
tmp->enqueue(this->q_index->dequeue());
}
this->p_indexGenerator->creatAllIndex(tmp);
@ -155,14 +161,14 @@ void FirstIndex::run(){
qDebug() << "index end;";
sem.release(2);
});
QtConcurrent::run([&](){
QtConcurrent::run([&]() {
sem.acquire(2);
mutex3.unlock();
QQueue<QString>* tmp = new QQueue<QString>();
qDebug()<<"q_content_index:"<<q_content_index->size();
while (!this->q_content_index->empty()) {
qDebug() << "q_content_index:" << q_content_index->size();
while(!this->q_content_index->empty()) {
// for (size_t i = 0; (i < this->u_send_length) && (!this->q_content_index->empty()); ++i){
for (size_t i = 0; (i < 30) && (!this->q_content_index->empty()); ++i){
for(size_t i = 0; (i < 30) && (!this->q_content_index->empty()); ++i) {
tmp->enqueue(this->q_content_index->dequeue());
}
this->p_indexGenerator->creatAllIndex(tmp);
@ -180,33 +186,29 @@ void FirstIndex::run(){
mutex2.unlock();
mutex3.unlock();
if (this->q_index)
if(this->q_index)
delete this->q_index;
this->q_index = nullptr;
if (this->q_content_index)
if(this->q_content_index)
delete this->q_content_index;
this->q_content_index = nullptr;
if (p_indexGenerator)
if(p_indexGenerator)
delete p_indexGenerator;
p_indexGenerator = nullptr;
GlobalSettings::getInstance()->forceSync();
// GlobalSettings::getInstance()->forceSync();
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "2");
IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, "2");
::_exit(0);
}
else if(pid < 0)
{
qWarning()<<"First Index fork error!!";
}
else
{
waitpid(pid,NULL,0);
} else if(pid < 0) {
qWarning() << "First Index fork error!!";
} else {
waitpid(pid, NULL, 0);
--FileUtils::_index_status;
}
GlobalSettings::getInstance()->setValue(INOTIFY_NORMAL_EXIT, "2");
int retval = write(fifo_fd, buffer, strlen(buffer));
if(retval == -1)
{
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2");
int retval1 = write(fifo_fd, buffer, strlen(buffer));
if(retval1 == -1) {
qWarning("write error\n");
}
qDebug("write data ok!\n");

View File

@ -34,17 +34,18 @@
#include <fcntl.h>
#include <sys/wait.h>
#include <sys/prctl.h>
#include <syslog.h>
//#include <QtConcurrent>
#include "traverse_bfs.h"
#include "global-settings.h"
#include "index-status-recorder.h"
#include "index-generator.h"
#include "inotify-index.h"
#include "file-utils.h"
class FirstIndex : public QThread, public Traverse_BFS
{
#include "common.h"
namespace Zeeker {
class FirstIndex : public QThread, public Traverse_BFS {
public:
FirstIndex(const QString&);
FirstIndex();
~FirstIndex();
virtual void DoSomething(const QFileInfo &) final;
protected:
@ -82,5 +83,6 @@ private:
//xapian will auto commit per 10,000 changes, donnot change it!!!
const size_t u_send_length = 8192;
};
}
#endif // FIRSTINDEX_H

View File

@ -27,7 +27,6 @@
#include <QFile>
#include "file-utils.h"
#include "index-generator.h"
#include "global-settings.h"
#include "chinese-segmentation.h"
#include "construct-document.h"
#include <QStandardPaths>
@ -36,42 +35,41 @@
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
using namespace Zeeker;
static IndexGenerator *global_instance = nullptr;
QMutex IndexGenerator::m_mutex;
QList<Document> *_doc_list_path;
QMutex _mutex_doc_list_path;
QList<Document> *_doc_list_content;
QMutex _mutex_doc_list_content;
IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent)
{
QList<Document> *Zeeker::_doc_list_path;
QMutex Zeeker::_mutex_doc_list_path;
QList<Document> *Zeeker::_doc_list_content;
QMutex Zeeker::_mutex_doc_list_content;
IndexGenerator *IndexGenerator::getInstance(bool rebuild, QObject *parent) {
QMutexLocker locker(&m_mutex);
if (!global_instance) {
qDebug()<<"IndexGenerator=================";
global_instance = new IndexGenerator(rebuild,parent);
if(!global_instance) {
qDebug() << "IndexGenerator=================";
global_instance = new IndexGenerator(rebuild, parent);
}
qDebug() << "global_instance" << global_instance;
qDebug() << "QThread::currentThreadId()" << QThread::currentThreadId();
return global_instance;
}
bool IndexGenerator::setIndexdataPath()
{
bool IndexGenerator::setIndexdataPath() {
return true;
}
//文件名索引
bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList)
{
bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList) {
// FileUtils::_index_status |= 0x1;
// qDebug() << messageList->size();
HandlePathList(messageList);
if (_doc_list_path == NULL){
if(_doc_list_path == NULL) {
return false;
}
qDebug()<<"begin creatAllIndex";
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"0");
try
{
qDebug() << "begin creatAllIndex";
// GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "0");
try {
// m_indexer = new Xapian::TermGenerator();
// m_indexer.set_database(*m_database_path);
//可以实现拼写纠正
@ -80,7 +78,7 @@ bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList)
// int count =0;
for (auto i : *_doc_list_path){
for(auto i : *_doc_list_path) {
insertIntoDatabase(i);
// if(++count > 8999){
@ -89,17 +87,15 @@ bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList)
// }
}
m_database_path->commit();
}
catch(const Xapian::Error &e)
{
qWarning()<<"creatAllIndex fail!"<<QString::fromStdString(e.get_description());
} catch(const Xapian::Error &e) {
qWarning() << "creatAllIndex fail!" << QString::fromStdString(e.get_description());
//need a record
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"1");
IndexStatusRecorder::getInstance()->setStatus(INDEX_DATABASE_STATE, "1");
// FileUtils::_index_status &= ~0x1;
assert(false);
}
GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE,"2");
qDebug()<<"finish creatAllIndex";
// GlobalSettings::getInstance()->setValue(INDEX_DATABASE_STATE, "2");
qDebug() << "finish creatAllIndex";
// FileUtils::_index_status &= ~0x1;
_doc_list_path->clear();
delete _doc_list_path;
@ -107,41 +103,36 @@ bool IndexGenerator::creatAllIndex(QQueue<QVector<QString> > *messageList)
return true;
}
//文件内容索引
bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList)
{
bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList) {
// FileUtils::_index_status |= 0x2;
HandlePathList(messageList);
qDebug()<<"begin creatAllIndex for content";
if (_doc_list_content == NULL){
qDebug() << "begin creatAllIndex for content";
if(_doc_list_content == NULL) {
return false;
}
int size = _doc_list_content->size();
qDebug()<<"begin creatAllIndex for content"<<size;
if(!size == 0)
{
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE,"0");
try
{
int count =0;
for (auto i : *_doc_list_content){
qDebug() << "begin creatAllIndex for content" << size;
if(!size == 0) {
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "0");
try {
int count = 0;
for(auto i : *_doc_list_content) {
insertIntoContentDatabase(i);
if(++count > 999){
if(++count > 999) {
count = 0;
m_database_content->commit();
}
}
}
m_database_content->commit();
}
catch(const Xapian::Error &e)
{
qWarning()<<"creat content Index fail!"<<QString::fromStdString(e.get_description());
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE,"1");
} catch(const Xapian::Error &e) {
qWarning() << "creat content Index fail!" << QString::fromStdString(e.get_description());
IndexStatusRecorder::getInstance()->setStatus(CONTENT_INDEX_DATABASE_STATE, "1");
// FileUtils::_index_status &= ~0x2;
assert(false);
}
GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE,"2");
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
// FileUtils::_index_status &= ~0x2;
qDebug()<<"finish creatAllIndex for content";
qDebug() << "finish creatAllIndex for content";
_doc_list_content->clear();
delete _doc_list_content;
_doc_list_content = nullptr;
@ -151,36 +142,28 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList)
}
IndexGenerator::IndexGenerator(bool rebuild, QObject *parent) : QObject(parent)
{
IndexGenerator::IndexGenerator(bool rebuild, QObject *parent) : QObject(parent) {
QDir database(QString::fromStdString(INDEX_PATH));
if(database.exists())
{
if(database.exists()) {
if(rebuild)
qDebug()<<"remove"<<database.removeRecursively();
}
else
{
qDebug()<<"create index path"<<database.mkpath(QString::fromStdString(INDEX_PATH));
qDebug() << "remove" << database.removeRecursively();
} else {
qDebug() << "create index path" << database.mkpath(QString::fromStdString(INDEX_PATH));
}
database.setPath(QString::fromStdString(CONTENT_INDEX_PATH));
if(database.exists())
{
if(database.exists()) {
if(rebuild)
qDebug()<<"remove"<<database.removeRecursively();
}
else
{
qDebug()<<"create content index path"<<database.mkpath(QString::fromStdString(CONTENT_INDEX_PATH));
qDebug() << "remove" << database.removeRecursively();
} else {
qDebug() << "create content index path" << database.mkpath(QString::fromStdString(CONTENT_INDEX_PATH));
}
m_database_path = new Xapian::WritableDatabase(INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
m_database_content = new Xapian::WritableDatabase(CONTENT_INDEX_PATH, Xapian::DB_CREATE_OR_OPEN);
}
IndexGenerator::~IndexGenerator()
{
IndexGenerator::~IndexGenerator() {
QMutexLocker locker(&m_mutex);
qDebug() << "~IndexGenerator";
if(m_database_path)
@ -216,8 +199,7 @@ IndexGenerator::~IndexGenerator()
qDebug() << "~IndexGenerator end";
}
void IndexGenerator::insertIntoDatabase(Document& doc)
{
void IndexGenerator::insertIntoDatabase(Document& doc) {
// qDebug()<< "--index start--";
Xapian::Document document = doc.getXapianDocument();
// m_indexer.set_document(document);
@ -229,24 +211,22 @@ void IndexGenerator::insertIntoDatabase(Document& doc)
// qDebug()<<"insert m_database_path:"<<QString::fromStdString(document.get_data());
// qDebug()<<"document:"<<QString::fromStdString(document.get_description());
Xapian::docid innerId= m_database_path->replace_document(doc.getUniqueTerm(),document);
Xapian::docid innerId = m_database_path->replace_document(doc.getUniqueTerm(), document);
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
// qDebug()<< "--index finish--";
return;
}
//#define fun(a) a=new ;printf()
void IndexGenerator::insertIntoContentDatabase(Document& doc)
{
Xapian::docid innerId= m_database_content->replace_document(doc.getUniqueTerm(),doc.getXapianDocument());
void IndexGenerator::insertIntoContentDatabase(Document& doc) {
Xapian::docid innerId = m_database_content->replace_document(doc.getUniqueTerm(), doc.getXapianDocument());
// qDebug()<<"replace doc docid="<<static_cast<int>(innerId);
// qDebug()<< "--index finish--";
return;
}
void IndexGenerator::HandlePathList(QQueue<QVector<QString>> *messageList)
{
qDebug()<<"Begin HandlePathList!";
qDebug()<<messageList->size();
void IndexGenerator::HandlePathList(QQueue<QVector<QString>> *messageList) {
qDebug() << "Begin HandlePathList!";
qDebug() << messageList->size();
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
// QFuture<Document> future = QtConcurrent::mapped(*messageList,&IndexGenerator::GenerateDocument);
@ -259,12 +239,11 @@ void IndexGenerator::HandlePathList(QQueue<QVector<QString>> *messageList)
pool.setMaxThreadCount(((QThread::idealThreadCount() - 1) / 2) + 1);
pool.setExpiryTimeout(100);
ConstructDocumentForPath *constructer;
while(!messageList->isEmpty())
{
constructer = new ConstructDocumentForPath(messageList->dequeue());
pool.start(constructer);
while(!messageList->isEmpty()) {
constructer = new ConstructDocumentForPath(messageList->dequeue());
pool.start(constructer);
}
qDebug()<<"pool finish"<<pool.waitForDone(-1);
qDebug() << "pool finish" << pool.waitForDone(-1);
// if(constructer)
// delete constructer;
// constructer = nullptr;
@ -276,14 +255,13 @@ void IndexGenerator::HandlePathList(QQueue<QVector<QString>> *messageList)
// m_doc_list_path = std::move(future.results());
// qDebug()<<m_doc_list_path.size();
qDebug()<<"Finish HandlePathList!";
qDebug() << "Finish HandlePathList!";
return;
}
void IndexGenerator::HandlePathList(QQueue<QString> *messageList)
{
qDebug()<<"Begin HandlePathList for content index!";
qDebug()<<messageList->size();
void IndexGenerator::HandlePathList(QQueue<QString> *messageList) {
qDebug() << "Begin HandlePathList for content index!";
qDebug() << messageList->size();
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
ChineseSegmentation::getInstance();
ConstructDocumentForContent *constructer;
@ -291,12 +269,11 @@ void IndexGenerator::HandlePathList(QQueue<QString> *messageList)
// pool.setMaxThreadCount(((QThread::idealThreadCount() - 1) / 2) + 1);
pool.setMaxThreadCount(1);
pool.setExpiryTimeout(100);
while(!messageList->isEmpty())
{
constructer = new ConstructDocumentForContent(messageList->dequeue());
pool.start(constructer);
while(!messageList->isEmpty()) {
constructer = new ConstructDocumentForContent(messageList->dequeue());
pool.start(constructer);
}
qDebug()<<"pool finish"<<pool.waitForDone(-1);
qDebug() << "pool finish" << pool.waitForDone(-1);
// if(constructer)
// delete constructer;
// constructer = nullptr;
@ -316,19 +293,18 @@ void IndexGenerator::HandlePathList(QQueue<QString> *messageList)
// m_doc_list_content = std::move(future.results());
// future.cancel();
qDebug()<<"Finish HandlePathList for content index!";
qDebug() << "Finish HandlePathList for content index!";
return;
}
Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
{
Document IndexGenerator::GenerateDocument(const QVector<QString> &list) {
Document doc;
// qDebug()<<QString::number(quintptr(QThread::currentThreadId()));
//0-filename 1-filepathname 2-file or dir
QString index_text = list.at(0);
QString sourcePath = list.at(1);
index_text = index_text.replace(""," ");
QString sourcePath = list.at(1);
index_text = index_text.replace("", " ");
index_text = index_text.simplified();
//不带多音字版
@ -336,16 +312,16 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
//多音字版
//现加入首字母
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(list.at(0)).replace(".",""));
for (QString& i : pinyin_text_list){
QStringList pinyin_text_list = FileUtils::findMultiToneWords(QString(list.at(0)).replace(".", ""));
for(QString& i : pinyin_text_list) {
i.replace("", " ");
i = i.simplified();
}
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/",0,-2,QString::SectionIncludeLeadingSep)));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(sourcePath.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
// QString uniqueterm1 = QString::fromStdString(QCryptographicHash::hash(sourcePath.toUtf8(),QCryptographicHash::Md5).toStdString());
/*--------------------------------------------------------------------*/
/*--------------------------------------------------------------------*/
//QByteArray 和 QString 之间会进行隐式转换,造成字符串被截断等意想不到的后果!!!!!!! zpf
// if(uniqueterm1!=uniqueterm){
// qDebug()<<"-----------------------------------------start";
@ -353,7 +329,7 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
// qDebug()<<uniqueterm;
// qDebug()<<"------------------------------------------finish";
// }
/*--------------------------------------------------------------------*/
/*--------------------------------------------------------------------*/
doc.setData(sourcePath);
doc.setUniqueTerm(uniqueterm);
@ -367,8 +343,7 @@ Document IndexGenerator::GenerateDocument(const QVector<QString> &list)
}
Document IndexGenerator::GenerateContentDocument(const QString &path)
{
Document IndexGenerator::GenerateContentDocument(const QString &path) {
// 构造文本索引的document
QString content;
QStringList tmp;
@ -377,18 +352,17 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
Document doc;
QString uniqueterm;
QString upTerm;
FileReader::getTextContent(path,content);
FileReader::getTextContent(path, content);
term = ChineseSegmentation::getInstance()->callSegement(content);
term = ChineseSegmentation::getInstance()->callSegement(content.toStdString());
// QStringList term = content.split("");
doc.setData(content);
doc.setUniqueTerm(uniqueterm);
doc.addTerm(upTerm);
doc.addValue(path);
for(int i = 0;i<term.size();++i)
{
doc.addPosting(term.at(i).word,term.at(i).offsets,static_cast<int>(term.at(i).weight));
for(int i = 0; i < term.size(); ++i) {
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
}
@ -408,8 +382,7 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
return doc;
}
bool IndexGenerator::isIndexdataExist()
{
bool IndexGenerator::isIndexdataExist() {
// Xapian::Database db(m_index_data_path->toStdString());
return true;
@ -417,12 +390,10 @@ bool IndexGenerator::isIndexdataExist()
}
QStringList IndexGenerator::IndexSearch(QString indexText)
{
QStringList IndexGenerator::IndexSearch(QString indexText) {
QStringList searchResult;
try
{
qDebug()<<"--search start--";
try {
qDebug() << "--search start--";
Xapian::Database db(INDEX_PATH);
Xapian::Enquire enquire(db);
@ -431,33 +402,32 @@ QStringList IndexGenerator::IndexSearch(QString indexText)
qp.set_database(db);
auto userInput = indexText;
std::string queryStr = indexText.replace(""," ").toStdString();
std::string queryStr = indexText.replace("", " ").toStdString();
// std::string s =db.get_spelling_suggestion(queryStr,10);
// qDebug()<<"spelling_suggestion!"<<QString::fromStdString(s);
qDebug()<<"queryStr!"<<QString::fromStdString(queryStr);
qDebug() << "queryStr!" << QString::fromStdString(queryStr);
//Creat a query
Xapian::Query queryPhrase = qp.parse_query(queryStr,Xapian::QueryParser::FLAG_PHRASE);
Xapian::Query queryPhrase = qp.parse_query(queryStr, Xapian::QueryParser::FLAG_PHRASE);
std::vector<Xapian::Query> v;
for(int i=0;i<userInput.size();i++)
{
for(int i = 0; i < userInput.size(); i++) {
v.push_back(Xapian::Query(QString(userInput.at(i)).toStdString()));
qDebug()<<userInput.at(i);
qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
qDebug() << userInput.at(i);
qDebug() << QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
}
Xapian::Query queryNear =Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND,queryNear,queryPhrase);
Xapian::Query queryNear = Xapian::Query(Xapian::Query::OP_NEAR, v.begin(), v.end());
Xapian::Query query = Xapian::Query(Xapian::Query::OP_AND, queryNear, queryPhrase);
qDebug()<<QString::fromStdString(query.get_description());
qDebug() << QString::fromStdString(query.get_description());
enquire.set_query(query);
Xapian::MSet result = enquire.get_mset(0, 9999);
qDebug()<< "find results count=" <<static_cast<int>(result.get_matches_estimated());
qDebug() << "find results count=" << static_cast<int>(result.get_matches_estimated());
// QStringList *pathTobeDelete = new QStringList;
//get search result
for (auto it = result.begin(); it != result.end(); ++it) {
for(auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
Xapian::weight docScoreWeight = it.get_weight();
@ -465,27 +435,22 @@ QStringList IndexGenerator::IndexSearch(QString indexText)
// QFileInfo *info = new QFileInfo(QString::fromStdString(data));
QFileInfo info(QString::fromStdString(data));
if(!info.exists())
{
if(!info.exists()) {
// pathTobeDelete->append(QString::fromStdString(data));
qDebug()<<QString::fromStdString(data)<<"is not exist!!";
}
else
{
qDebug() << QString::fromStdString(data) << "is not exist!!";
} else {
searchResult.append(QString::fromStdString(data));
}
qDebug()<< "doc="<< QString::fromStdString(data) << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
qDebug() << "doc=" << QString::fromStdString(data) << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
}
// //Delete those path doc which is not already exist.
// if(!pathTobeDelete->isEmpty())
// deleteAllIndex(pathTobeDelete);
qDebug()<< "--search finish--";
}
catch(const Xapian::Error &e)
{
qDebug() <<QString::fromStdString(e.get_description());
qDebug() << "--search finish--";
} catch(const Xapian::Error &e) {
qDebug() << QString::fromStdString(e.get_description());
}
return searchResult;
}
@ -555,32 +520,27 @@ QStringList IndexGenerator::IndexSearch(QString indexText)
// }
//}
bool IndexGenerator::deleteAllIndex(QStringList *pathlist)
{
bool IndexGenerator::deleteAllIndex(QStringList *pathlist) {
QStringList *list = pathlist;
if(list->isEmpty())
return true;
for(int i = 0;i<list->size();i++)
{
for(int i = 0; i < list->size(); i++) {
QString doc = list->at(i);
std::string uniqueterm = FileUtils::makeDocUterm(doc);
try
{
qDebug()<<"--delete start--";
try {
qDebug() << "--delete start--";
m_database_path->delete_document(uniqueterm);
m_database_content->delete_document(uniqueterm);
qDebug()<<"delete path"<<doc;
qDebug()<<"delete md5"<<QString::fromStdString(uniqueterm);
qDebug() << "delete path" << doc;
qDebug() << "delete md5" << QString::fromStdString(uniqueterm);
m_database_path->commit();
m_database_content->commit();
qDebug()<< "--delete finish--";
qDebug() << "--delete finish--";
// qDebug()<<"m_database_path->get_lastdocid()!!!"<<m_database_path->get_lastdocid();
// qDebug()<<"m_database_path->get_doccount()!!!"<<m_database_path->get_doccount();
}
catch(const Xapian::Error &e)
{
qWarning()<<QString::fromStdString(e.get_description());
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return false;
}
}

View File

@ -29,19 +29,21 @@
#include <QMutex>
#include <QQueue>
//#include <QMetaObject>
#include "index-status-recorder.h"
#include "document.h"
#include "file-reader.h"
#include "common.h"
namespace Zeeker {
extern QList<Document> *_doc_list_path;
extern QMutex _mutex_doc_list_path;
extern QList<Document> *_doc_list_content;
extern QMutex _mutex_doc_list_content;
class IndexGenerator : public QObject
{
class IndexGenerator : public QObject {
Q_OBJECT
public:
static IndexGenerator *getInstance(bool rebuild = false,QObject *parent = nullptr);
static IndexGenerator *getInstance(bool rebuild = false, QObject *parent = nullptr);
~IndexGenerator();
bool setIndexdataPath();
bool isIndexdataExist();
@ -58,7 +60,7 @@ public Q_SLOTS:
bool deleteAllIndex(QStringList *pathlist);
private:
explicit IndexGenerator(bool rebuild = false,QObject *parent = nullptr);
explicit IndexGenerator(bool rebuild = false, QObject *parent = nullptr);
static QMutex m_mutex;
//For file name index
void HandlePathList(QQueue<QVector<QString> > *messageList);
@ -72,7 +74,7 @@ private:
// QList<Document> *m_doc_list_path; //for path index
// QList<Document> *m_doc_list_content; // for text content index
QMap<QString,QStringList> m_index_map;
QMap<QString, QStringList> m_index_map;
QString m_index_data_path;
Xapian::WritableDatabase* m_database_path;
Xapian::WritableDatabase* m_database_content;
@ -80,5 +82,6 @@ private:
std::string m_index_text_str;
Xapian::TermGenerator m_indexer;
};
}
#endif // INDEXGENERATOR_H

View File

@ -0,0 +1,28 @@
#include "index-status-recorder.h"
using namespace Zeeker;
static IndexStatusRecorder *global_instance_indexStatusRecorder = nullptr;
IndexStatusRecorder *IndexStatusRecorder::getInstance()
{
if(!global_instance_indexStatusRecorder) {
global_instance_indexStatusRecorder = new IndexStatusRecorder;
}
return global_instance_indexStatusRecorder;
}
void IndexStatusRecorder::setStatus(const QString &key, const QVariant &value)
{
m_status->setValue(key, value);
m_status->sync();
}
const QVariant IndexStatusRecorder::getStatus(const QString &key)
{
return m_status->value(key);
}
IndexStatusRecorder::IndexStatusRecorder(QObject *parent) : QObject(parent)
{
m_status = new QSettings(INDEX_STATUS, QSettings::IniFormat, this);
}

View File

@ -0,0 +1,27 @@
#ifndef INDEXSTATUSRECORDER_H
#define INDEXSTATUSRECORDER_H
#include <QObject>
#include <QSettings>
#include <QDir>
#define CONTENT_INDEX_DATABASE_STATE "content_index_database_state"
#define INDEX_DATABASE_STATE "index_database_state"
#define INOTIFY_NORMAL_EXIT "inotify_normal_exit"
#define INDEX_STATUS QDir::homePath() + "/.config/org.ukui/ukui-search/ukui-search-index-status.conf"
namespace Zeeker {
//fixme: we need a better way to record index status.
class IndexStatusRecorder : public QObject
{
Q_OBJECT
public:
static IndexStatusRecorder *getInstance();
void setStatus(const QString& key, const QVariant& value);
const QVariant getStatus(const QString& key);
private:
explicit IndexStatusRecorder(QObject *parent = nullptr);
QSettings *m_status;
};
}
#endif // INDEXSTATUSRECORDER_H

View File

@ -6,8 +6,10 @@ HEADERS += \
$$PWD/file-reader.h \
$$PWD/first-index.h \
$$PWD/index-generator.h \
$$PWD/index-status-recorder.h \
$$PWD/inotify-index.h \
$$PWD/search-manager.h \
$$PWD/searchmethodmanager.h \
$$PWD/traverse_bfs.h \
$$PWD/ukui-search-qdbus.h
@ -17,8 +19,10 @@ SOURCES += \
$$PWD/file-reader.cpp \
$$PWD/first-index.cpp \
$$PWD/index-generator.cpp \
$$PWD/index-status-recorder.cpp \
$$PWD/inotify-index.cpp \
$$PWD/search-manager.cpp \
$$PWD/searchmethodmanager.cpp \
$$PWD/traverse_bfs.cpp \
$$PWD/ukui-search-qdbus.cpp

View File

@ -45,39 +45,32 @@
#define CREATE_FILE \
CREATE_FILE_NAME_INDEX \
CREATE_FILE_CONTENT_INDEX
InotifyIndex::InotifyIndex(const QString& path) : Traverse_BFS(path)
{
using namespace Zeeker;
InotifyIndex::InotifyIndex(const QString& path) : Traverse_BFS(path) {
qDebug() << "setInotifyMaxUserWatches start";
UkuiSearchQDBus usQDBus;
usQDBus.setInotifyMaxUserWatches();
qDebug() << "setInotifyMaxUserWatches end";
m_fd = inotify_init();
qDebug() << "m_fd----------->" <<m_fd;
this->AddWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
this->firstTraverse();
}
InotifyIndex::~InotifyIndex()
{
InotifyIndex::~InotifyIndex() {
IndexGenerator::getInstance()->~IndexGenerator();
qWarning() << "~InotifyIndex";
}
void InotifyIndex::firstTraverse(){
void InotifyIndex::firstTraverse() {
QQueue<QString> bfs;
bfs.enqueue(this->path);
QFileInfoList list;
QDir dir;
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
while (!bfs.empty()) {
while(!bfs.empty()) {
dir.setPath(bfs.dequeue());
list = dir.entryInfoList();
for (auto i : list){
if (i.isDir() && (!(i.isSymLink()))){
for(auto i : list) {
if(i.isDir() && (!(i.isSymLink()))) {
this->AddWatch(i.absoluteFilePath());
bfs.enqueue(i.absoluteFilePath());
}
@ -85,24 +78,24 @@ void InotifyIndex::firstTraverse(){
}
}
void InotifyIndex::DoSomething(const QFileInfo& fileInfo){
void InotifyIndex::DoSomething(const QFileInfo& fileInfo) {
qDebug() << fileInfo.fileName() << "-------" << fileInfo.absoluteFilePath();
if(fileInfo.isDir() && (!fileInfo.isSymLink())){
if(fileInfo.isDir() && (!fileInfo.isSymLink())) {
this->AddWatch(fileInfo.absoluteFilePath());
}
QQueue<QVector<QString> > tempFile;
tempFile.enqueue(QVector<QString>() << fileInfo.fileName() << fileInfo.absoluteFilePath() << QString((fileInfo.isDir() && (!fileInfo.isSymLink())) ? "1" : "0"));
IndexGenerator::getInstance()->creatAllIndex(&tempFile);
if ((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])){
if((fileInfo.fileName().split(".", QString::SkipEmptyParts).length() > 1) && (true == targetFileTypeMap[fileInfo.fileName().split(".").last()])) {
QQueue<QString> tmp;
tmp.enqueue(fileInfo.absoluteFilePath());
IndexGenerator::getInstance()->creatAllIndex(&tmp);
}
}
bool InotifyIndex::AddWatch(const QString &path){
bool InotifyIndex::AddWatch(const QString &path) {
int ret = inotify_add_watch(m_fd, path.toStdString().c_str(), (IN_MOVED_FROM | IN_MOVED_TO | IN_CREATE | IN_DELETE | IN_MODIFY));
if (ret == -1) {
if(ret == -1) {
qDebug() << "AddWatch error:" << path;
return false;
}
@ -113,42 +106,65 @@ bool InotifyIndex::AddWatch(const QString &path){
return true;
}
bool InotifyIndex::RemoveWatch(const QString &path){
bool InotifyIndex::RemoveWatch(const QString &path, bool removeFromDatabase) {
int ret = inotify_rm_watch(m_fd, currentPath.key(path));
if (ret){
if(ret) {
qDebug() << "remove path error";
return false;
}
// Q_ASSERT(ret == 0);
assert(ret == 0);
for (QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();){
// qDebug() << i.value();
if (i.value().length() > path.length()){
// if (i.value().mid(0, path.length()) == path){
// if (path.startsWith(i.value())){
if (i.value().startsWith(path)){
qDebug() << "remove path: " << i.value();
ret = inotify_rm_watch(m_fd, currentPath.key(path));
if (ret){
qDebug() << "remove path error";
// return false;
if(removeFromDatabase) {
for(QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();) {
// qDebug() << i.value();
if(i.value().length() > path.length()) {
// if (i.value().mid(0, path.length()) == path){
// if (path.startsWith(i.value())){
if(i.value().startsWith(path)) {
qDebug() << "remove path: " << i.value();
ret = inotify_rm_watch(m_fd, currentPath.key(path));
if(ret) {
qDebug() << "remove path error";
// return false;
}
// assert(ret == 0);
/*--------------------------------*/
//在此调用删除索引
qDebug() << i.value();
IndexGenerator::getInstance()->deleteAllIndex(new QStringList(i.value()));
/*--------------------------------*/
currentPath.erase(i++);
// i++;
} else {
i++;
}
// assert(ret == 0);
/*--------------------------------*/
//在此调用删除索引
qDebug() << i.value();
IndexGenerator::getInstance()->deleteAllIndex(new QStringList(i.value()));
/*--------------------------------*/
currentPath.erase(i++);
// i++;
}
else{
} else {
i++;
}
}
else{
i++;
} else {
for(QMap<int, QString>::Iterator i = currentPath.begin(); i != currentPath.end();) {
// qDebug() << i.value();
if(i.value().length() > path.length()) {
// if (i.value().mid(0, path.length()) == path){
// if (path.startsWith(i.value())){
if(i.value().startsWith(path)) {
qDebug() << "remove path: " << i.value();
ret = inotify_rm_watch(m_fd, currentPath.key(path));
if(ret) {
qDebug() << "remove path error";
// return false;
}
// assert(ret == 0);
currentPath.erase(i++);
// i++;
} else {
i++;
}
} else {
i++;
}
}
}
// qDebug() << path;
@ -157,35 +173,37 @@ bool InotifyIndex::RemoveWatch(const QString &path){
return true;
}
void InotifyIndex::eventProcess(const char* buf, ssize_t tmp){
void InotifyIndex::eventProcess(const char* buf, ssize_t tmp) {
QQueue<QVector<QString>>* indexQueue = new QQueue<QVector<QString>>();
QQueue<QString>* contentIndexQueue = new QQueue<QString>();
ssize_t numRead = 0;
numRead = tmp;
char * p = const_cast<char*>(buf);
for (; p < buf + numRead;) {
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "0");
for(; p < buf + numRead;) {
struct inotify_event * event = reinterpret_cast<inotify_event *>(p);
qDebug() << "Read Event event->wd: " << event->wd;
qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
if(event->name[0] != '.'){
if(event->name[0] != '.') {
qDebug() << "Read Event event->wd: " << event->wd;
qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
qDebug() << QString(currentPath[event->wd] + '/' + event->name);
// switch (event->mask) {
if (event->mask & IN_CREATE){
if(event->mask & IN_CREATE) {
//Create top dir first, traverse it last.
qDebug() << "IN_CREATE";
CREATE_FILE
if (event->mask & IN_ISDIR){
if(event->mask & IN_ISDIR) {
TRAVERSE_DIR
}
goto next;
}
if ((event->mask & IN_DELETE) | (event->mask & IN_MOVED_FROM)){
if((event->mask & IN_DELETE) | (event->mask & IN_MOVED_FROM)) {
qDebug() << "IN_DELETE or IN_MOVED_FROM";
if (event->mask & IN_ISDIR){
if(event->mask & IN_ISDIR) {
RemoveWatch(currentPath[event->wd] + '/' + event->name);
}
//delete once more
@ -193,24 +211,23 @@ void InotifyIndex::eventProcess(const char* buf, ssize_t tmp){
goto next;
}
if (event->mask & IN_MODIFY){
if(event->mask & IN_MODIFY) {
qDebug() << "IN_MODIFY";
if (!(event->mask & IN_ISDIR)){
if(!(event->mask & IN_ISDIR)) {
// IndexGenerator::getInstance()->deleteAllIndex(new QStringList(currentPath[event->wd] + '/' + event->name));
CREATE_FILE
}
goto next;
}
if (event->mask & IN_MOVED_TO){
if(event->mask & IN_MOVED_TO) {
qDebug() << "IN_MOVED_TO";
if (event->mask & IN_ISDIR){
if(event->mask & IN_ISDIR) {
RemoveWatch(currentPath[event->wd] + '/' + event->name);
// IndexGenerator::getInstance()->deleteAllIndex(new QStringList(currentPath[event->wd] + '/' + event->name));
CREATE_FILE
TRAVERSE_DIR
}
else {
} else {
IndexGenerator::getInstance()->deleteAllIndex(new QStringList(currentPath[event->wd] + '/' + event->name));
CREATE_FILE
}
@ -220,26 +237,31 @@ void InotifyIndex::eventProcess(const char* buf, ssize_t tmp){
next:
p += sizeof(struct inotify_event) + event->len;
}
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "2");
delete indexQueue;
indexQueue = nullptr;
delete contentIndexQueue;
contentIndexQueue = nullptr;
}
void InotifyIndex::run(){
void InotifyIndex::run() {
m_fd = inotify_init();
qDebug() << "m_fd----------->" << m_fd;
this->AddWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
this->firstTraverse();
int fifo_fd;
char buffer[2];
memset(buffer, 0, sizeof(buffer));
fifo_fd = open(UKUI_SEARCH_PIPE_PATH, O_RDWR);
if(fifo_fd == -1)
{
if(fifo_fd == -1) {
perror("open fifo error\n");
assert(false);
}
int retval = read(fifo_fd, buffer, sizeof(buffer));
if(retval == -1)
{
if(retval == -1) {
perror("read error\n");
assert(false);
}
@ -247,7 +269,7 @@ void InotifyIndex::run(){
printf("read data ok\n");
close(fifo_fd);
if (buffer[0] & 0x1){
if(buffer[0] & 0x1) {
printf("data confirmed\n");
}
unlink(UKUI_SEARCH_PIPE_PATH);
@ -256,50 +278,48 @@ void InotifyIndex::run(){
ssize_t numRead;
for (;;) { /* Read events forever */
read:
while(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
// for (;;) { /* Read events forever */
memset(buf, 0x00, BUF_LEN);
numRead = read(m_fd, buf, BUF_LEN);
if (numRead == -1){
if(numRead == -1) {
printf("\033[1;31;40mread event error\033[0m\n");
GlobalSettings::getInstance()->setValue(INOTIFY_NORMAL_EXIT, "1");
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
fflush(stdout);
assert(false);
}
//TODO: Merge multiple signals.
char * tmp = const_cast<char*>(buf);
for (; tmp < buf + numRead;) {
for(; tmp < buf + numRead;) {
struct inotify_event * event = reinterpret_cast<inotify_event *>(tmp);
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
if(event->name[0] != '.'){
GlobalSettings::getInstance()->setValue(INOTIFY_NORMAL_EXIT, "0");
goto fork;
if(event->name[0] != '.') {
// qDebug() << "Read Event: " << currentPath[event->wd] << QString(event->name) << event->cookie << event->wd << event->mask;
// qDebug("mask:0x%x,",event->mask);
break;
}
tmp += sizeof(struct inotify_event) + event->len;
}
goto read;
if(tmp >= buf + numRead) {
continue;
}
fork:
++FileUtils::_index_status;
pid_t pid;
pid = fork();
if(pid == 0)
{
if(pid == 0) {
prctl(PR_SET_PDEATHSIG, SIGTERM);
prctl(PR_SET_NAME,"inotify-index");
if (numRead == 0) {
prctl(PR_SET_NAME, "inotify-index");
if(numRead == 0) {
qDebug() << "read() from inotify fd returned 0!";
}
if (numRead == -1) {
if(numRead == -1) {
qDebug() << "read";
}
eventProcess(buf, numRead);
GlobalSettings::getInstance()->setValue(INOTIFY_NORMAL_EXIT, "2");
fd_set read_fds;
int rc;
@ -307,48 +327,60 @@ fork:
read_timeout->tv_sec = 40;
read_timeout->tv_usec = 0;
for(;;)
{
for(;;) {
FD_ZERO(&read_fds);
FD_SET(m_fd, &read_fds);
qDebug() << read_timeout->tv_sec;
rc = select(m_fd + 1, &read_fds, NULL, NULL, read_timeout);
if ( rc < 0 ) {
if(rc < 0) {
// error
qWarning() << "select result < 0, error!";
GlobalSettings::getInstance()->setValue(INOTIFY_NORMAL_EXIT, "1");
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "1");
assert(false);
}
else if ( rc == 0 ) {
} else if(rc == 0) {
qDebug() << "select timeout!";
::free(read_timeout);
IndexGenerator::getInstance()->~IndexGenerator();
GlobalSettings::getInstance()->forceSync();
::_exit(0);
}else{
GlobalSettings::getInstance()->setValue(INOTIFY_NORMAL_EXIT, "0");
// GlobalSettings::getInstance()->forceSync();
::_exit(0);
} else {
memset(buf, 0x00, BUF_LEN);
numRead = read(m_fd, buf, BUF_LEN);
if (numRead == -1){
if(numRead == -1) {
printf("\033[1;31;40mread event error\033[0m\n");
fflush(stdout);
assert(false);
}
//TODO: Merge multiple signals.
char * tmp = const_cast<char*>(buf);
for(; tmp < buf + numRead; ) {
struct inotify_event * event = reinterpret_cast<inotify_event *>(tmp);
if(event->name[0] != '.') {
break;
}
tmp += sizeof(struct inotify_event) + event->len;
}
if(tmp >= buf + numRead) {
continue;
}
qDebug() << "Read " << numRead << " bytes from inotify fd";
this->eventProcess(buf, numRead);
GlobalSettings::getInstance()->setValue(INOTIFY_NORMAL_EXIT, "2");
}
}
}
else if (pid > 0){
} else if(pid > 0) {
memset(buf, 0x00, BUF_LEN);
waitpid(pid, NULL, 0);
--FileUtils::_index_status;
}
else{
} else {
assert(false);
}
}
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3");
RemoveWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false);
}
}

View File

@ -27,19 +27,21 @@
#include "index-generator.h"
#include "traverse_bfs.h"
#include "ukui-search-qdbus.h"
#include "global-settings.h"
#include "index-status-recorder.h"
#include "file-utils.h"
#include "first-index.h"
#include "common.h"
#define BUF_LEN 1024000
namespace Zeeker {
class InotifyIndex;
static InotifyIndex* global_instance_of_index = nullptr;
class InotifyIndex : public QThread, public Traverse_BFS
{
class InotifyIndex : public QThread, public Traverse_BFS {
Q_OBJECT
public:
static InotifyIndex* getInstance(const QString& path){
if (!global_instance_of_index) {
static InotifyIndex* getInstance(const QString& path) {
if(!global_instance_of_index) {
global_instance_of_index = new InotifyIndex(path);
}
return global_instance_of_index;
@ -48,7 +50,7 @@ public:
~InotifyIndex();
bool AddWatch(const QString&);
bool RemoveWatch(const QString&);
bool RemoveWatch(const QString&, bool removeFromDatabase = true);
virtual void DoSomething(const QFileInfo &) final;
void eventProcess(const char*, ssize_t);
@ -56,7 +58,6 @@ public:
protected:
void run() override;
private:
QString* m_watch_path;
int m_fd;
QMap<int, QString> currentPath;
@ -78,5 +79,6 @@ private:
};
};
}
#endif // INOTIFYINDEX_H

View File

@ -17,48 +17,35 @@
* Authors: zhangpengfei <zhangpengfei@kylinos.cn>
*
*/
#include <QFileInfo>
#include <QDebug>
#include <QtConcurrent>
#include <QThread>
#include <QUrl>
#include "search-manager.h"
#include "global-settings.h"
#include "chinese-segmentation.h"
using namespace Zeeker;
size_t SearchManager::uniqueSymbol1 = 0;
size_t SearchManager::uniqueSymbol2 = 0;
size_t SearchManager::uniqueSymbol3 = 0;
QMutex SearchManager::m_mutex1;
QMutex SearchManager::m_mutex2;
QMutex SearchManager::m_mutex3;
SearchManager::SearchManager(QObject *parent) : QObject(parent)
{
SearchManager::SearchManager(QObject *parent) : QObject(parent) {
m_pool.setMaxThreadCount(2);
m_pool.setExpiryTimeout(1000);
}
SearchManager::~SearchManager()
{
SearchManager::~SearchManager() {
}
int SearchManager::getCurrentIndexCount()
{
try
{
int SearchManager::getCurrentIndexCount() {
try {
Xapian::Database db(INDEX_PATH);
return db.get_doccount();
}
catch(const Xapian::Error &e)
{
qWarning() <<QString::fromStdString(e.get_description());
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
return 0;
}
}
void SearchManager::onKeywordSearch(QString keyword,QQueue<QString> *searchResultFile,QQueue<QString> *searchResultDir,
QQueue<QPair<QString,QStringList>> *searchResultContent)
{
void SearchManager::onKeywordSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir,
QQueue<QPair<QString, QStringList>> *searchResultContent) {
m_mutex1.lock();
++uniqueSymbol1;
m_mutex1.unlock();
@ -69,25 +56,31 @@ void SearchManager::onKeywordSearch(QString keyword,QQueue<QString> *searchResul
++uniqueSymbol3;
m_mutex3.unlock();
FileSearch *filesearch;
filesearch = new FileSearch(searchResultFile,uniqueSymbol1,keyword,"0",1,0,5);
m_pool.start(filesearch);
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
DirectSearch *directSearch;
directSearch = new DirectSearch(keyword, searchResultFile, searchResultDir, uniqueSymbol1);
m_pool.start(directSearch);
} else if(FileUtils::SearchMethod::INDEXSEARCH == FileUtils::searchMethod) {
FileSearch *filesearch;
filesearch = new FileSearch(searchResultFile, uniqueSymbol1, keyword, "0", 1, 0, 5);
m_pool.start(filesearch);
FileSearch *dirsearch;
dirsearch = new FileSearch(searchResultDir,uniqueSymbol2,keyword,"1",1,0,5);
m_pool.start(dirsearch);
FileSearch *dirsearch;
dirsearch = new FileSearch(searchResultDir, uniqueSymbol2, keyword, "1", 1, 0, 5);
m_pool.start(dirsearch);
FileContentSearch *contentSearch;
contentSearch = new FileContentSearch(searchResultContent,uniqueSymbol3,keyword,0,5);
m_pool.start(contentSearch);
FileContentSearch *contentSearch;
contentSearch = new FileContentSearch(searchResultContent, uniqueSymbol3, keyword, 0, 5);
m_pool.start(contentSearch);
} else {
qWarning() << "Unknown search method! FileUtils::searchMethod: " << static_cast<int>(FileUtils::searchMethod);
}
return;
}
bool SearchManager::isBlocked(QString &path)
{
bool SearchManager::isBlocked(QString &path) {
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
for(QString i : blockList)
{
for(QString i : blockList) {
if(path.startsWith(i.prepend("/")))
return true;
}
@ -95,8 +88,7 @@ bool SearchManager::isBlocked(QString &path)
}
FileSearch::FileSearch(QQueue<QString> *searchResult,size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num)
{
FileSearch::FileSearch(QQueue<QString> *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot, int begin, int num) {
this->setAutoDelete(true);
m_search_result = searchResult;
m_uniqueSymbol = uniqueSymbol;
@ -107,19 +99,17 @@ FileSearch::FileSearch(QQueue<QString> *searchResult,size_t uniqueSymbol, QStrin
m_num = num;
}
FileSearch::~FileSearch()
{
FileSearch::~FileSearch() {
m_search_result = nullptr;
}
void FileSearch::run()
{
if(!m_search_result->isEmpty())
void FileSearch::run() {
if(!m_search_result->isEmpty()) {
m_search_result->clear();
}
int resultCount = 0;
int total = 0;
while(total < 100)
{
while(total < 100) {
resultCount = keywordSearchfile();
if(resultCount == 0 || resultCount == -1)
break;
@ -129,66 +119,55 @@ void FileSearch::run()
return;
}
int FileSearch::keywordSearchfile()
{
try
{
int FileSearch::keywordSearchfile() {
try {
qDebug() << "--keywordSearchfile start--";
Xapian::Database db(INDEX_PATH);
Xapian::Query query = creatQueryForFileSearch(db);
Xapian::Enquire enquire(db);
Xapian::Query queryFile;
if(!m_value.isEmpty())
{
if(!m_value.isEmpty()) {
std::string slotValue = m_value.toStdString();
Xapian::Query queryValue = Xapian::Query(Xapian::Query::OP_VALUE_RANGE,m_slot,slotValue,slotValue);
queryFile = Xapian::Query(Xapian::Query::OP_AND,query,queryValue);
}
else
{
Xapian::Query queryValue = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, m_slot, slotValue, slotValue);
queryFile = Xapian::Query(Xapian::Query::OP_AND, query, queryValue);
} else {
queryFile = query;
}
qDebug() << "keywordSearchfile:"<<QString::fromStdString(queryFile.get_description());
qDebug() << "keywordSearchfile:" << QString::fromStdString(queryFile.get_description());
enquire.set_query(queryFile);
Xapian::MSet result = enquire.get_mset(m_begin, m_num);
int resultCount = result.size();
qDebug()<< "keywordSearchfile results count=" <<resultCount;
qDebug() << "keywordSearchfile results count=" << resultCount;
if(resultCount == 0)
return 0;
if(getResult(result) == -1)
return -1;
qDebug()<< "--keywordSearchfile finish--";
qDebug() << "--keywordSearchfile finish--";
return resultCount;
}
catch(const Xapian::Error &e)
{
qWarning() <<QString::fromStdString(e.get_description());
qDebug()<< "--keywordSearchfile finish--";
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
qDebug() << "--keywordSearchfile finish--";
return -1;
}
}
Xapian::Query FileSearch::creatQueryForFileSearch(Xapian::Database &db)
{
Xapian::Query FileSearch::creatQueryForFileSearch(Xapian::Database &db) {
auto userInput = m_keyword.toLower();
std::vector<Xapian::Query> v;
for(int i=0;i<userInput.size();i++)
{
for(int i = 0; i < userInput.size(); i++) {
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.at(i)).toStdString()));
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
}
Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
Xapian::Query queryPhrase = Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
return queryPhrase;
}
int FileSearch::getResult(Xapian::MSet &result)
{
for (auto it = result.begin(); it != result.end(); ++it)
{
int FileSearch::getResult(Xapian::MSet &result) {
for(auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
Xapian::weight docScoreWeight = it.get_weight();
@ -196,29 +175,23 @@ int FileSearch::getResult(Xapian::MSet &result)
QString path = QString::fromStdString(data);
std::string().swap(data);
if(SearchManager::isBlocked(path))
if(SearchManager::isBlocked(path)) {
continue;
}
QFileInfo info(path);
if(!info.exists())
{
// pathTobeDelete->append(QString::fromStdString(data));
qDebug()<<path<<"is not exist!!";
}
else
{
switch (m_value.toInt())
{
if(!info.exists()) {
// pathTobeDelete->append(QString::fromStdString(data));
qDebug() << path << "is not exist!!";
} else {
switch(m_value.toInt()) {
case 1:
SearchManager::m_mutex1.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbol2)
{
if(m_uniqueSymbol == SearchManager::uniqueSymbol2) {
m_search_result->enqueue(path);
SearchManager::m_mutex1.unlock();
}
else
{
} else {
SearchManager::m_mutex1.unlock();
return -1;
}
@ -226,13 +199,10 @@ int FileSearch::getResult(Xapian::MSet &result)
break;
case 0:
SearchManager::m_mutex2.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbol1)
{
if(m_uniqueSymbol == SearchManager::uniqueSymbol1) {
m_search_result->enqueue(path);
SearchManager::m_mutex2.unlock();
}
else
{
} else {
SearchManager::m_mutex2.unlock();
return -1;
}
@ -242,15 +212,14 @@ int FileSearch::getResult(Xapian::MSet &result)
}
// searchResult.append(path);
}
qDebug()<< "doc="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
qDebug() << "doc=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
}
// if(!pathTobeDelete->isEmpty())
// deleteAllIndex(pathTobeDelete)
return 0;
}
FileContentSearch::FileContentSearch(QQueue<QPair<QString,QStringList>> *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num)
{
FileContentSearch::FileContentSearch(QQueue<QPair<QString, QStringList>> *searchResult, size_t uniqueSymbol, QString keyword, int begin, int num) {
this->setAutoDelete(true);
m_search_result = searchResult;
m_uniqueSymbol = uniqueSymbol;
@ -259,59 +228,55 @@ FileContentSearch::FileContentSearch(QQueue<QPair<QString,QStringList>> *searchR
m_num = num;
}
FileContentSearch::~FileContentSearch()
{
FileContentSearch::~FileContentSearch() {
m_search_result = nullptr;
}
void FileContentSearch::run()
{
if(!m_search_result->isEmpty())
void FileContentSearch::run() {
if(!m_search_result->isEmpty()) {
m_search_result->clear();
}
int resultCount = 0;
int total = 0;
while(total<50)
{
while(total < 50) {
resultCount = keywordSearchContent();
if(resultCount == 0 || resultCount == -1)
if(resultCount == 0 || resultCount == -1) {
break;
}
total += resultCount;
m_begin += m_num;
}
return;
}
int FileContentSearch::keywordSearchContent()
{
try
{
qDebug()<<"--keywordSearchContent search start--";
int FileContentSearch::keywordSearchContent() {
try {
qDebug() << "--keywordSearchContent search start--";
Xapian::Database db(CONTENT_INDEX_PATH);
Xapian::Enquire enquire(db);
Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db);
/*
::friso::ResultMap ret;
::friso::FrisoSegmentation::getInstance()->callSegement(ret, keyword.toLocal8Bit().data());
for (::friso::ResultMap::iterator it_map = ret.begin(); it_map != ret.end(); ++it_map){
target_str += it_map->first;
target_str += " ";
it_map->second.first.clear();
::std::vector<size_t>().swap(it_map->second.first);
}
/*
::friso::ResultMap ret;
::friso::FrisoSegmentation::getInstance()->callSegement(ret, keyword.toLocal8Bit().data());
for (::friso::ResultMap::iterator it_map = ret.begin(); it_map != ret.end(); ++it_map){
target_str += it_map->first;
target_str += " ";
it_map->second.first.clear();
::std::vector<size_t>().swap(it_map->second.first);
}
ret.clear();
ret.erase(ret.begin(), ret.end());
::friso::ResultMap().swap(ret);
*/
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword);
ret.clear();
ret.erase(ret.begin(), ret.end());
::friso::ResultMap().swap(ret);
*/
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
//Creat a query
std::string words;
for(int i=0;i<sKeyWord.size();i++)
{
for(int i = 0; i < sKeyWord.size(); i++) {
words.append(sKeyWord.at(i).word).append(" ");
}
@ -338,49 +303,47 @@ int FileContentSearch::keywordSearchContent()
// qDebug()<<QString::fromStdString(sKeyWord.at(i).word);
// }
// Xapian::Query queryPhrase =Xapian::Query(Xapian::Query::OP_AND, v.begin(), v.end());
qDebug()<<"keywordSearchContent:"<<QString::fromStdString(query.get_description());
qDebug() << "keywordSearchContent:" << QString::fromStdString(query.get_description());
enquire.set_query(query);
Xapian::MSet result = enquire.get_mset(m_begin, m_num);
int resultCount = result.size();
if(result.size() == 0)
if(result.size() == 0) {
return 0;
qDebug()<< "keywordSearchContent results count=" <<resultCount;
}
qDebug() << "keywordSearchContent results count=" << resultCount;
if(getResult(result,words) == -1)
if(getResult(result, words) == -1) {
return -1;
}
qDebug()<< "--keywordSearchContent search finish--";
qDebug() << "--keywordSearchContent search finish--";
return resultCount;
}
catch(const Xapian::Error &e)
{
qWarning() <<QString::fromStdString(e.get_description());
qDebug()<< "--keywordSearchContent search finish--";
} catch(const Xapian::Error &e) {
qWarning() << QString::fromStdString(e.get_description());
qDebug() << "--keywordSearchContent search finish--";
return -1;
}
}
int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord)
{
for (auto it = result.begin(); it != result.end(); ++it)
{
int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) {
for(auto it = result.begin(); it != result.end(); ++it) {
Xapian::Document doc = it.get_document();
std::string data = doc.get_data();
double docScoreWeight = it.get_weight();
Xapian::percent docScorePercent = it.get_percent();
QString path = QString::fromStdString(doc.get_value(1));
if(SearchManager::isBlocked(path))
if(SearchManager::isBlocked(path)) {
continue;
}
QFileInfo info(path);
if(!info.exists())
{
if(!info.exists()) {
// pathTobeDelete->append(QString::fromStdString(data));
qDebug()<<path<<"is not exist!!";
qDebug() << path << "is not exist!!";
continue;
}
// Construct snippets containing keyword.
@ -388,18 +351,18 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord)
// snippets.append(QString::fromStdString( result.snippet(doc.get_data(),400)));
// qWarning()<<QString::fromStdString(s);
auto term = doc.termlist_begin();
std::string wordTobeFound = QString::fromStdString(keyWord).section(" ",0,0).toStdString();
std::string wordTobeFound = QString::fromStdString(keyWord).section(" ", 0, 0).toStdString();
int size = wordTobeFound.length();
term.skip_to(wordTobeFound);
int count =0;
for(auto pos = term.positionlist_begin();pos != term.positionlist_end()&&count < 6;++pos)
{
std::string s = data.substr((*pos < 60)? 0: (*pos - 60) , size + 120);
int count = 0;
for(auto pos = term.positionlist_begin(); pos != term.positionlist_end() && count < 6; ++pos) {
std::string s = data.substr((*pos < 60) ? 0 : (*pos - 60), size + 120);
QString snippet = QString::fromStdString(s);
if(snippet.size() > 6 + QString::fromStdString(keyWord).size())
snippet.replace(0,3,"...").replace(snippet.size()-3,3,"...");
else
if(snippet.size() > 6 + QString::fromStdString(keyWord).size()) {
snippet.replace(0, 3, "...").replace(snippet.size() - 3, 3, "...");
} else {
snippet.append("...").prepend("...");
}
snippets.append(snippet);
QString().swap(snippet);
std::string().swap(s);
@ -423,23 +386,84 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord)
// }
SearchManager::m_mutex3.lock();
if(m_uniqueSymbol == SearchManager::uniqueSymbol3)
{
m_search_result->enqueue(qMakePair(path,snippets));
if(m_uniqueSymbol == SearchManager::uniqueSymbol3) {
m_search_result->enqueue(qMakePair(path, snippets));
SearchManager::m_mutex3.unlock();
snippets.clear();
QStringList().swap(snippets);
}
else
{
} else {
SearchManager::m_mutex3.unlock();
return -1;
}
// searchResult.insert(path,snippets);
qDebug()<< "path="<< path << ",weight=" <<docScoreWeight << ",percent=" << docScorePercent;
qDebug() << "path=" << path << ",weight=" << docScoreWeight << ",percent=" << docScorePercent;
}
// // if(!pathTobeDelete->isEmpty())
// // deleteAllIndex(pathTobeDelete)
return 0;
}
DirectSearch::DirectSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, size_t uniqueSymbol) {
this->setAutoDelete(true);
m_keyword = keyword;
m_searchResultFile = searchResultFile;
m_searchResultDir = searchResultDir;
m_uniqueSymbol = uniqueSymbol;
}
void DirectSearch::run() {
QQueue<QString> bfs;
bfs.enqueue(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
QFileInfoList list;
QDir dir;
// QDir::Hidden
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
while(!bfs.empty()) {
dir.setPath(bfs.dequeue());
list = dir.entryInfoList();
for (auto i : list) {
if (i.isDir() && (!(i.isSymLink()))) {
bool findIndex = false;
for (QString j : blockList) {
if (i.absoluteFilePath().startsWith(j.prepend("/"))) {
findIndex = true;
break;
}
}
if (findIndex == true) {
qDebug() << "path is blocked:" << i.absoluteFilePath();
continue;
}
bfs.enqueue(i.absoluteFilePath());
}
if(i.fileName().contains(m_keyword, Qt::CaseInsensitive)) {
SearchManager::m_mutex1.lock();
// qWarning() << i.fileName() << m_keyword;
if(m_uniqueSymbol == SearchManager::uniqueSymbol1) {
// TODO
if(i.isDir() && m_searchResultDir->length() < 51) {
m_searchResultDir->enqueue(i.absoluteFilePath());
} else if(m_searchResultFile->length() < 51) {
m_searchResultFile->enqueue(i.absoluteFilePath());
}
SearchManager::m_mutex1.unlock();
if(m_searchResultDir->length() > 49 && m_searchResultFile->length() > 49) {
return;
}
} else {
// TODO
// More suitable method?
m_searchResultFile->clear();
m_searchResultDir->clear();
SearchManager::m_mutex1.unlock();
return;
}
}
}
}
}

View File

@ -31,14 +31,23 @@
#include <thread>
#include <QRunnable>
#include <QThreadPool>
#include <QFileInfo>
#include <QDebug>
#include <QtConcurrent/QtConcurrent>
#include <QThread>
#include <QUrl>
#include "file-utils.h"
#include "global-settings.h"
#include "chinese-segmentation.h"
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
namespace Zeeker {
class SearchManager : public QObject
{
class LIBSEARCH_EXPORT SearchManager : public QObject {
friend class FileSearch;
friend class FileContentSearch;
Q_OBJECT
@ -56,12 +65,12 @@ public:
static QMutex m_mutex3;
public Q_SLOTS:
void onKeywordSearch(QString keyword,QQueue<QString> *searchResultFile,QQueue<QString> *searchResultDir,QQueue<QPair<QString,QStringList>> *searchResultContent);
void onKeywordSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, QQueue<QPair<QString, QStringList>> *searchResultContent);
Q_SIGNALS:
void resultFile(QQueue<QString> *);
void resultDir(QQueue<QString> *);
void resultContent(QQueue<QPair<QString,QStringList>> *);
void resultContent(QQueue<QPair<QString, QStringList>> *);
private:
// int keywordSearchfile(size_t uniqueSymbol, QString keyword, QString value,unsigned slot = 1,int begin = 0, int num = 20);
// int keywordSearchContent(size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
@ -87,10 +96,9 @@ private:
QThreadPool m_pool;
};
class FileSearch : public QRunnable
{
class FileSearch : public QRunnable {
public:
explicit FileSearch(QQueue<QString> *searchResult,size_t uniqueSymbol, QString keyword, QString value,unsigned slot = 1,int begin = 0, int num = 20);
explicit FileSearch(QQueue<QString> *searchResult, size_t uniqueSymbol, QString keyword, QString value, unsigned slot = 1, int begin = 0, int num = 20);
~FileSearch();
protected:
void run();
@ -107,21 +115,35 @@ private:
int m_begin = 0;
int m_num = 20;
};
class FileContentSearch : public QRunnable
{
class FileContentSearch : public QRunnable {
public:
explicit FileContentSearch(QQueue<QPair<QString,QStringList>> *searchResult,size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
explicit FileContentSearch(QQueue<QPair<QString, QStringList>> *searchResult, size_t uniqueSymbol, QString keyword, int begin = 0, int num = 20);
~FileContentSearch();
protected:
void run();
private:
int keywordSearchContent();
int getResult(Xapian::MSet &result,std::string &keyWord);
int getResult(Xapian::MSet &result, std::string &keyWord);
QQueue<QPair<QString,QStringList>> *m_search_result = nullptr;
QQueue<QPair<QString, QStringList>> *m_search_result = nullptr;
size_t m_uniqueSymbol;
QString m_keyword;
int m_begin = 0;
int m_num = 20;
};
class DirectSearch : public QRunnable {
public:
explicit DirectSearch(QString keyword, QQueue<QString> *searchResultFile, QQueue<QString> *searchResultDir, size_t uniqueSymbol);
protected:
void run();
private:
QString m_keyword;
QQueue<QString>* m_searchResultFile = nullptr;
QQueue<QString>* m_searchResultDir = nullptr;
size_t m_uniqueSymbol;
};
}
#endif // SEARCHMANAGER_H

View File

@ -0,0 +1,25 @@
#include "searchmethodmanager.h"
using namespace Zeeker;
void SearchMethodManager::searchMethod(FileUtils::SearchMethod sm) {
qWarning() << "searchMethod start: " << static_cast<int>(sm);
if(FileUtils::SearchMethod::INDEXSEARCH == sm || FileUtils::SearchMethod::DIRECTSEARCH == sm) {
FileUtils::searchMethod = sm;
} else {
printf("enum class error!!!\n");
qWarning("enum class error!!!\n");
}
if(FileUtils::SearchMethod::INDEXSEARCH == sm && 0 == FileUtils::_index_status) {
qWarning() << "start first index";
// m_fi = FirstIndex("/home/zhangzihao/Desktop");
m_fi.start();
qWarning() << "start inotify index";
// InotifyIndex ii("/home");
// ii.start();
this->m_ii = InotifyIndex::getInstance("/home");
if(!this->m_ii->isRunning()) {
this->m_ii->start();
}
qDebug() << "Search method has been set to INDEXSEARCH";
}
qWarning() << "searchMethod end: " << static_cast<int>(FileUtils::searchMethod);
}

View File

@ -0,0 +1,17 @@
#ifndef SEARCHMETHODMANAGER_H
#define SEARCHMETHODMANAGER_H
#include "first-index.h"
#include "inotify-index.h"
namespace Zeeker {
class SearchMethodManager {
public:
SearchMethodManager() = default;
void searchMethod(FileUtils::SearchMethod sm);
private:
FirstIndex m_fi;
InotifyIndex* m_ii;
};
}
#endif // SEARCHMETHODMANAGER_H

View File

@ -18,14 +18,13 @@
*
*/
#include "traverse_bfs.h"
Traverse_BFS::Traverse_BFS(const QString& path)
{
using namespace Zeeker;
Traverse_BFS::Traverse_BFS(const QString& path) {
Q_ASSERT('/' == path.at(0));
this->path = path;
}
void Traverse_BFS::Traverse(){
void Traverse_BFS::Traverse() {
QQueue<QString> bfs;
bfs.enqueue(this->path);
QFileInfoList list;
@ -33,11 +32,11 @@ void Traverse_BFS::Traverse(){
// QDir::Hidden
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
while (!bfs.empty()) {
while(!bfs.empty()) {
dir.setPath(bfs.dequeue());
list = dir.entryInfoList();
for (auto i : list){
if (i.isDir() && (!(i.isSymLink()))){
for(auto i : list) {
if(i.isDir() && (!(i.isSymLink()))) {
bfs.enqueue(i.absoluteFilePath());
}
DoSomething(i);
@ -45,6 +44,6 @@ void Traverse_BFS::Traverse(){
}
}
void Traverse_BFS::setPath(const QString& path){
void Traverse_BFS::setPath(const QString& path) {
this->path = path;
}

View File

@ -24,20 +24,21 @@
#include <QDebug>
#include <QDir>
#include <QQueue>
class Traverse_BFS
{
namespace Zeeker {
class Traverse_BFS {
public:
Traverse_BFS() = default;
void Traverse();
virtual ~Traverse_BFS() = default;
virtual void DoSomething(const QFileInfo&) = 0;
void setPath(const QString&);
protected:
explicit Traverse_BFS(const QString&);
Traverse_BFS(const QString&);
QString path = "/home";
private:
Traverse_BFS(const Traverse_BFS&) = delete;
void operator=(const Traverse_BFS&) = delete;
};
}
#endif // TRAVERSE_BFS_H

View File

@ -19,28 +19,26 @@
*/
#include "ukui-search-qdbus.h"
#include <QDebug>
UkuiSearchQDBus::UkuiSearchQDBus()
{
using namespace Zeeker;
UkuiSearchQDBus::UkuiSearchQDBus() {
this->tmpSystemQDBusInterface = new QDBusInterface("com.ukui.search.qt.systemdbus",
"/",
"com.ukui.search.interface",
QDBusConnection::systemBus());
if (!tmpSystemQDBusInterface->isValid()){
"/",
"com.ukui.search.interface",
QDBusConnection::systemBus());
if(!tmpSystemQDBusInterface->isValid()) {
qCritical() << "Create Client Interface Failed When execute chage: " << QDBusConnection::systemBus().lastError();
return;
}
}
UkuiSearchQDBus::~UkuiSearchQDBus(){
UkuiSearchQDBus::~UkuiSearchQDBus() {
delete this->tmpSystemQDBusInterface;
this->tmpSystemQDBusInterface = nullptr;
}
//一键三连
void UkuiSearchQDBus::setInotifyMaxUserWatches()
{
void UkuiSearchQDBus::setInotifyMaxUserWatches() {
// /proc/sys/fs/inotify/max_user_watches
this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep1");
// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep1");
// sysctl
this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep2");
// /etc/sysctl.conf

View File

@ -21,9 +21,8 @@
#define UKUISEARCHQDBUS_H
#include <QDBusInterface>
class UkuiSearchQDBus
{
namespace Zeeker {
class UkuiSearchQDBus {
public:
UkuiSearchQDBus();
~UkuiSearchQDBus();
@ -31,5 +30,6 @@ public:
private:
QDBusInterface* tmpSystemQDBusInterface;
};
}
#endif // UKUISEARCHQDBUS_H

View File

@ -18,6 +18,6 @@
*
*/
#include "libsearch.h"
using namespace Zeeker;
// Encapsulate advanced interfaces here.

Some files were not shown because too many files have changed in this diff Show More