Merge pull request #274 from iaom/0619-new-frontend

Merge from dev-unity
This commit is contained in:
iaom 2021-06-30 16:28:01 +08:00 committed by GitHub
commit 37144107d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 421 additions and 108 deletions

View File

@ -33,6 +33,19 @@ struct DatElement {
} }
}; };
struct IdfElement {
string word;
double idf = 0;
bool operator < (const IdfElement & b) const {
if (word == b.word) {
return this->idf > b.idf;
}
return this->word < b.word;
}
};
inline std::ostream & operator << (std::ostream& os, const DatElement & elem) { inline std::ostream & operator << (std::ostream& os, const DatElement & elem) {
return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight; return os << "word=" << elem.word << "/tag=" << elem.tag << "/weight=" << elem.weight;
} }
@ -91,13 +104,24 @@ public:
JiebaDAT::result_pair_type find_result; JiebaDAT::result_pair_type find_result;
dat_.exactMatchSearch(key.c_str(), find_result); dat_.exactMatchSearch(key.c_str(), find_result);
if ((0 == find_result.length) || (find_result.value < 0) || (find_result.value >= elements_num_)) { if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
return nullptr; return nullptr;
} }
return &elements_ptr_[ find_result.value ]; return &elements_ptr_[ find_result.value ];
} }
const double Find(const string & key, std::size_t length, std::size_t node_pos) const {
JiebaDAT::result_pair_type find_result;
dat_.exactMatchSearch(key.c_str(), find_result, length, node_pos);
if ((0 == find_result.length) || (find_result.value < 0) || ((size_t)find_result.value >= elements_num_)) {
return -1;
}
return idf_elements_ptr_[ find_result.value ];
}
void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, void Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end,
vector<struct DatDag>&res, size_t max_word_len) const { vector<struct DatDag>&res, size_t max_word_len) const {
@ -119,7 +143,7 @@ public:
for (std::size_t idx = 0; idx < num_results; ++idx) { for (std::size_t idx = 0; idx < num_results; ++idx) {
auto & match = result_pairs[idx]; auto & match = result_pairs[idx];
if ((match.value < 0) || (match.value >= elements_num_)) { if ((match.value < 0) || ((size_t)match.value >= elements_num_)) {
continue; continue;
} }
@ -156,6 +180,11 @@ public:
return InitAttachDat(dat_cache_file, md5); return InitAttachDat(dat_cache_file, md5);
} }
bool InitBuildDat(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
BuildDatCache(elements, dat_cache_file, md5);
return InitIdfAttachDat(dat_cache_file, md5);
}
bool InitAttachDat(const string & dat_cache_file, const string & md5) { bool InitAttachDat(const string & dat_cache_file, const string & md5) {
mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY); mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
@ -187,6 +216,37 @@ public:
return true; return true;
} }
bool InitIdfAttachDat(const string & dat_cache_file, const string & md5) {
mmap_fd_ = ::open(dat_cache_file.c_str(), O_RDONLY);
if (mmap_fd_ < 0) {
return false;
}
const auto seek_off = ::lseek(mmap_fd_, 0, SEEK_END);
assert(seek_off >= 0);
mmap_length_ = seek_off;
mmap_addr_ = reinterpret_cast<char *>(mmap(NULL, mmap_length_, PROT_READ, MAP_SHARED, mmap_fd_, 0));
assert(MAP_FAILED != mmap_addr_);
assert(mmap_length_ >= sizeof(CacheFileHeader));
CacheFileHeader & header = *reinterpret_cast<CacheFileHeader*>(mmap_addr_);
elements_num_ = header.elements_num;
min_weight_ = header.min_weight;
assert(sizeof(header.md5_hex) == md5.size());
if (0 != memcmp(&header.md5_hex[0], md5.c_str(), md5.size())) {
return false;
}
assert(mmap_length_ == sizeof(header) + header.elements_num * sizeof(double) + header.dat_size * dat_.unit_size());
idf_elements_ptr_ = (const double *)(mmap_addr_ + sizeof(header));
const char * dat_ptr = mmap_addr_ + sizeof(header) + sizeof(double) * elements_num_;
dat_.set_array(dat_ptr, header.dat_size);
return true;
}
private: private:
void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) { void BuildDatCache(vector<DatElement>& elements, const string & dat_cache_file, const string & md5) {
std::sort(elements.begin(), elements.end()); std::sort(elements.begin(), elements.end());
@ -240,12 +300,62 @@ private:
} }
} }
void BuildDatCache(vector<IdfElement>& elements, const string & dat_cache_file, const string & md5) {
std::sort(elements.begin(), elements.end());
vector<const char*> keys_ptr_vec;
vector<int> values_vec;
vector<double> mem_elem_vec;
keys_ptr_vec.reserve(elements.size());
values_vec.reserve(elements.size());
mem_elem_vec.reserve(elements.size());
CacheFileHeader header;
header.min_weight = min_weight_;
assert(sizeof(header.md5_hex) == md5.size());
memcpy(&header.md5_hex[0], md5.c_str(), md5.size());
for (size_t i = 0; i < elements.size(); ++i) {
keys_ptr_vec.push_back(elements[i].word.data());
values_vec.push_back(i);
mem_elem_vec.push_back(elements[i].idf);
}
auto const ret = dat_.build(keys_ptr_vec.size(), &keys_ptr_vec[0], NULL, &values_vec[0]);
assert(0 == ret);
header.elements_num = mem_elem_vec.size();
header.dat_size = dat_.size();
{
string tmp_filepath = string(dat_cache_file) + "_XXXXXX";
::umask(S_IWGRP | S_IWOTH);
//const int fd =::mkstemp(&tmp_filepath[0]);
//原mkstemp用法有误已修复--jxx20210519
const int fd =::mkstemp((char *)tmp_filepath.data());
qDebug() << "mkstemp error:" << errno << tmp_filepath.data();
assert(fd >= 0);
::fchmod(fd, 0644);
auto write_bytes = ::write(fd, (const char *)&header, sizeof(header));
write_bytes += ::write(fd, (const char *)&mem_elem_vec[0], sizeof(double) * mem_elem_vec.size());
write_bytes += ::write(fd, dat_.array(), dat_.total_size());
assert(write_bytes == sizeof(header) + mem_elem_vec.size() * sizeof(double) + dat_.total_size());
::close(fd);
const auto rename_ret = ::rename(tmp_filepath.c_str(), dat_cache_file.c_str());
assert(0 == rename_ret);
}
}
DatTrie(const DatTrie &); DatTrie(const DatTrie &);
DatTrie &operator=(const DatTrie &); DatTrie &operator=(const DatTrie &);
private: private:
JiebaDAT dat_; JiebaDAT dat_;
const DatMemElem * elements_ptr_ = nullptr; const DatMemElem * elements_ptr_ = nullptr;
const double * idf_elements_ptr_= nullptr;
size_t elements_num_ = 0; size_t elements_num_ = 0;
double min_weight_ = 0; double min_weight_ = 0;

View File

@ -130,7 +130,7 @@ private:
dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache"; dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache";
} }
QString path = QString::fromStdString(dat_cache_path); QString path = QString::fromStdString(dat_cache_path);
qDebug() << "#########path:" << path; qDebug() << "#########Dict path:" << path;
if (dat_.InitAttachDat(dat_cache_path, md5)) { if (dat_.InitAttachDat(dat_cache_path, md5)) {
LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_; LoadUserDict(user_dict_paths, false); // for load user_dict_single_chinese_word_;
total_dict_size_ = file_size_sum; total_dict_size_ = file_size_sum;

View File

@ -138,10 +138,10 @@ private:
size_t now, old, stat; size_t now, old, stat;
double tmp, endE, endS; double tmp, endE, endS;
//vector<int> path(XYSize); vector<int> path(XYSize);
//vector<double> weight(XYSize); vector<double> weight(XYSize);
int path[XYSize]; //int path[XYSize];
double weight[XYSize]; //double weight[XYSize];
//start //start
for (size_t y = 0; y < Y; y++) { for (size_t y = 0; y < Y; y++) {

View File

@ -0,0 +1,134 @@
#pragma once
#include <iostream>
#include <fstream>
#include <map>
#include <string>
#include <cstring>
#include <cstdlib>
#include <stdint.h>
#include <cmath>
#include <limits>
#include "limonp/StringUtil.hpp"
#include "limonp/Logging.hpp"
#include "Unicode.hpp"
#include "DatTrie.hpp"
#include <QDebug>
namespace cppjieba {
using namespace limonp;
const size_t IDF_COLUMN_NUM = 2;
class IdfTrie {
public:
enum UserWordWeightOption {
WordWeightMin,
WordWeightMedian,
WordWeightMax,
}; // enum UserWordWeightOption
IdfTrie(const string& dict_path, const string & dat_cache_path = "",
UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
Init(dict_path, dat_cache_path, user_word_weight_opt);
}
~IdfTrie() {}
double Find(const string & word, std::size_t length = 0, std::size_t node_pos = 0) const {
return dat_.Find(word, length, node_pos);
}
void Find(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<struct DatDag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const {
dat_.Find(begin, end, res, max_word_len);
}
bool IsUserDictSingleChineseWord(const Rune& word) const {
return IsIn(user_dict_single_chinese_word_, word);
}
double GetMinWeight() const {
return dat_.GetMinWeight();
}
size_t GetTotalDictSize() const {
return total_dict_size_;
}
private:
void Init(const string& dict_path, string dat_cache_path,
UserWordWeightOption user_word_weight_opt) {
size_t file_size_sum = 0;
const string md5 = CalcFileListMD5(dict_path, file_size_sum);
if (dat_cache_path.empty()) {
//未指定词库数据文件存储位置的默认存储在tmp目录下--jxx20200519
dat_cache_path = /*dict_path*/"/tmp/" + md5 + "." + to_string(user_word_weight_opt) + ".dat_cache";
}
QString path = QString::fromStdString(dat_cache_path);
qDebug() << "#########Idf path:" << path;
if (dat_.InitIdfAttachDat(dat_cache_path, md5)) {
total_dict_size_ = file_size_sum;
return;
}
LoadDefaultIdf(dict_path);
double idf_sum_ = CalcIdfSum(static_node_infos_);
assert(static_node_infos_.size());
idfAverage_ = idf_sum_ / static_node_infos_.size();
assert(idfAverage_ > 0.0);
double min_weight = 0;
dat_.SetMinWeight(min_weight);
const auto build_ret = dat_.InitBuildDat(static_node_infos_, dat_cache_path, md5);
assert(build_ret);
total_dict_size_ = file_size_sum;
vector<IdfElement>().swap(static_node_infos_);
}
void LoadDefaultIdf(const string& filePath) {
ifstream ifs(filePath.c_str());
if(not ifs.is_open()){
return ;
}
XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
string line;
vector<string> buf;
size_t lineno = 0;
for (; getline(ifs, line); lineno++) {
if (line.empty()) {
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
continue;
}
Split(line, buf, " ");
XCHECK(buf.size() == IDF_COLUMN_NUM) << "split result illegal, line:" << line;
IdfElement node_info;
node_info.word = buf[0];
node_info.idf = atof(buf[1].c_str());
static_node_infos_.push_back(node_info);
}
}
double CalcIdfSum(const vector<IdfElement>& node_infos) const {
double sum = 0.0;
for (size_t i = 0; i < node_infos.size(); i++) {
sum += node_infos[i].idf;
}
return sum;
}
public:
double idfAverage_;
private:
vector<IdfElement> static_node_infos_;
size_t total_dict_size_ = 0;
DatTrie dat_;
unordered_set<Rune> user_dict_single_chinese_word_;
};
}

View File

@ -21,7 +21,7 @@ public:
mix_seg_(&dict_trie_, &model_, stopWordPath), mix_seg_(&dict_trie_, &model_, stopWordPath),
full_seg_(&dict_trie_), full_seg_(&dict_trie_),
query_seg_(&dict_trie_, &model_, stopWordPath), query_seg_(&dict_trie_, &model_, stopWordPath),
extractor(&dict_trie_, &model_, idfPath, stopWordPath){ } extractor(&dict_trie_, &model_, idfPath, dat_cache_path,stopWordPath){ }
~Jieba() { } ~Jieba() { }
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const { void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {

View File

@ -2,6 +2,7 @@
#include <cmath> #include <cmath>
#include "MixSegment.hpp" #include "MixSegment.hpp"
#include "IdfTrie.hpp"
namespace cppjieba { namespace cppjieba {
@ -11,18 +12,14 @@ using namespace std;
/*utf8*/ /*utf8*/
class KeywordExtractor { class KeywordExtractor {
public: public:
// struct Word {
// string word;
// vector<size_t> offsets;
// double weight;
// }; // struct Word
KeywordExtractor(const DictTrie* dictTrie, KeywordExtractor(const DictTrie* dictTrie,
const HMMModel* model, const HMMModel* model,
const string& idfPath, const string& idfPath,
const string& dat_cache_path,
const string& stopWordPath) const string& stopWordPath)
: segment_(dictTrie, model, stopWordPath) { : segment_(dictTrie, model, stopWordPath),
LoadIdfDict(idfPath); idf_trie_(idfPath,dat_cache_path){
} }
~KeywordExtractor() { ~KeywordExtractor() {
} }
@ -63,12 +60,11 @@ public:
keywords.reserve(wordmap.size()); keywords.reserve(wordmap.size());
for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) { for (unordered_map<string, KeyWord>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);//IDF词典查找 double idf = idf_trie_.Find(itr->first);
if (-1 != idf) {//IDF词典查找
if (cit != idfMap_.end()) { itr->second.weight *= idf;
itr->second.weight *= cit->second;
} else { } else {
itr->second.weight *= idfAverage_; itr->second.weight *= idf_trie_.idfAverage_;
} }
itr->second.word = itr->first; itr->second.word = itr->first;
@ -80,51 +76,13 @@ public:
keywords.resize(topN); keywords.resize(topN);
} }
private: private:
void LoadIdfDict(const string& idfPath) {
ifstream ifs(idfPath.c_str());
if(not ifs.is_open()){
return ;
}
XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
string line ;
vector<string> buf;
double idf = 0.0;
double idfSum = 0.0;
size_t lineno = 0;
for (; getline(ifs, line); lineno++) {
buf.clear();
if (line.empty()) {
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
continue;
}
Split(line, buf, " ");
if (buf.size() != 2) {
XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
continue;
}
idf = atof(buf[1].c_str());
idfMap_[buf[0]] = idf;
idfSum += idf;
}
assert(lineno);
idfAverage_ = idfSum / lineno;
assert(idfAverage_ > 0.0);
}
static bool Compare(const KeyWord& lhs, const KeyWord& rhs) { static bool Compare(const KeyWord& lhs, const KeyWord& rhs) {
return lhs.weight > rhs.weight; return lhs.weight > rhs.weight;
} }
MixSegment segment_; MixSegment segment_;
unordered_map<string, double> idfMap_; IdfTrie idf_trie_;
double idfAverage_;
unordered_set<Rune> symbols_; unordered_set<Rune> symbols_;
}; // class KeywordExtractor }; // class KeywordExtractor

View File

@ -156,8 +156,9 @@ public:
// if mp Get a single one and it is not in userdict, collect it in sequence // if mp Get a single one and it is not in userdict, collect it in sequence
size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里i字符不是最后一个字符直接判定j字符 size_t j = i + 1; //当前i字符为单独的字符并且不在用户字典里i字符不是最后一个字符直接判定j字符
while (j < (words.size() - 1) && words[j].left == words[j].right && while (j < (words.size() - 1)
!mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) { && words[j].left == words[j].right
&& !mpSeg_.IsUserDictSingleChineseWord(words[j].left->rune)) {
j++; j++;
} }

View File

@ -71,7 +71,7 @@ public:
cursor_ ++; cursor_ ++;
} }
} }
int num = 0;
while (cursor_ != sentence_.end()) { while (cursor_ != sentence_.end()) {
if (cursor_->rune == 0x20) { if (cursor_->rune == 0x20) {
if (wordRange.left == cursor_) { if (wordRange.left == cursor_) {
@ -83,6 +83,11 @@ public:
} }
cursor_ ++; cursor_ ++;
num++;
if (num >= 1024) { //todo 防止一次性传入过多字节暂定限制为1024个字
wordRange.right = cursor_;
return true;
}
} }
wordRange.right = sentence_.end(); wordRange.right = sentence_.end();

View File

@ -97,24 +97,6 @@ inline RuneArray DecodeRunesInString(const string& s) {
//重写DecodeRunesInString函数将实现放入函数中降低内存占用加快处理流程--jxx20210518 //重写DecodeRunesInString函数将实现放入函数中降低内存占用加快处理流程--jxx20210518
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) { inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
/*
RuneArray arr;
if (not DecodeRunesInString(s, arr)) {
return false;
}
runes.clear();
uint32_t offset = 0;
for (uint32_t i = 0; i < arr.size(); ++i) {
const uint32_t len = limonp::UnicodeToUtf8Bytes(arr[i]);
RuneInfo x(arr[i], offset, len, i, 1);
runes.push_back(x);
offset += len;
}
*/
uint32_t tmp; uint32_t tmp;
uint32_t offset = 0; uint32_t offset = 0;

View File

@ -2,6 +2,7 @@ INCLUDEPATH += $$PWD
HEADERS += \ HEADERS += \
$$PWD/DictTrie.hpp \ $$PWD/DictTrie.hpp \
$$PWD/IdfTrie.hpp \
$$PWD/FullSegment.hpp \ $$PWD/FullSegment.hpp \
$$PWD/HMMModel.hpp \ $$PWD/HMMModel.hpp \
$$PWD/HMMSegment.hpp \ $$PWD/HMMSegment.hpp \
@ -17,5 +18,4 @@ HEADERS += \
$$PWD/TextRankExtractor.hpp \ $$PWD/TextRankExtractor.hpp \
$$PWD/Trie.hpp \ $$PWD/Trie.hpp \
$$PWD/Unicode.hpp $$PWD/Unicode.hpp
include(limonp/limonp.pri) include(limonp/limonp.pri)

View File

@ -19,6 +19,8 @@ DEFINES += QT_DEPRECATED_WARNINGS
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
include(cppjieba/cppjieba.pri) include(cppjieba/cppjieba.pri)
#LIBS += -L/usr/local/lib/libjemalloc -ljemalloc
SOURCES += \ SOURCES += \
chinese-segmentation.cpp \ chinese-segmentation.cpp \

View File

@ -228,10 +228,11 @@ void AppMatch::getDesktopFilePath() {
} }
void AppMatch::getAppName(QMap<NameString, QStringList> &installed) { void AppMatch::getAppName(QMap<NameString, QStringList> &installed) {
QMap<NameString, QStringList>::const_iterator i; // QMap<NameString, QStringList>::const_iterator i;
for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) { // for(i = m_installAppMap.constBegin(); i != m_installAppMap.constEnd(); ++i) {
appNameMatch(i.key().app_name, installed); // appNameMatch(i.key().app_name, installed);
} // }
appNameMatch(installed);
qDebug() << "installed app match is successful!"; qDebug() << "installed app match is successful!";
} }
@ -276,6 +277,38 @@ void AppMatch::appNameMatch(QString appname, QMap<NameString, QStringList> &inst
} }
} }
} }
void AppMatch::appNameMatch(QMap<NameString, QStringList> &installed) {
QStringList list;
NameString name;
QMapIterator<NameString, QStringList> iter(m_installAppMap);
while(iter.hasNext()) {
iter.next();
list = iter.value();
name.app_name = iter.key().app_name;
if(iter.key().app_name.contains(m_sourceText, Qt::CaseInsensitive)) {
installed.insert(name, list);
continue;
}
QStringList pinyinlist;
pinyinlist = FileUtils::findMultiToneWords(iter.key().app_name);
for(int i = 0; i < pinyinlist.size() / 2; i++) {
QString shouzimu = pinyinlist.at(2 * i + 1); // 中文转首字母
if(shouzimu.contains(m_sourceText, Qt::CaseInsensitive)) {
installed.insert(name, list);
break;
}
if(m_sourceText.size() < 2)
break;
QString pinyin = pinyinlist.at(2 * i); // 中文转拼音
if(pinyin.contains(m_sourceText, Qt::CaseInsensitive)) {
installed.insert(name, list);
break;
}
}
}
}
void AppMatch::softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn) { void AppMatch::softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn) {
// if(m_interFace->timeout() != -1) { // if(m_interFace->timeout() != -1) {

View File

@ -65,6 +65,7 @@ private:
void getAppName(QMap<NameString, QStringList> &installed); void getAppName(QMap<NameString, QStringList> &installed);
// void appNameMatch(QString appname,QString desktoppath,QString appicon); // void appNameMatch(QString appname,QString desktoppath,QString appicon);
void appNameMatch(QString appname, QMap<NameString, QStringList> &installed); void appNameMatch(QString appname, QMap<NameString, QStringList> &installed);
void appNameMatch(QMap<NameString, QStringList> &installed);
void softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn); void softWareCenterSearch(QMap<NameString, QStringList> &softwarereturn);

View File

@ -483,8 +483,10 @@ void FileUtils::getDocxTextContent(QString &path, QString &textcontent) {
if(!file.open(QuaZip::mdUnzip)) if(!file.open(QuaZip::mdUnzip))
return; return;
if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) if(!file.setCurrentFile("word/document.xml", QuaZip::csSensitive)) {
file.close();
return; return;
}
QuaZipFile fileR(&file); QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly); //读取方式打开 fileR.open(QIODevice::ReadOnly); //读取方式打开
@ -545,8 +547,10 @@ void FileUtils::getPptxTextContent(QString &path, QString &textcontent) {
if(i.startsWith(prefix)) if(i.startsWith(prefix))
fileList << i; fileList << i;
} }
if(fileList.isEmpty()) if(fileList.isEmpty()) {
file.close();
return; return;
}
for(int i = 0; i < fileList.size(); ++i){ for(int i = 0; i < fileList.size(); ++i){
QString name = prefix + QString::number(i + 1) + ".xml"; QString name = prefix + QString::number(i + 1) + ".xml";
@ -650,8 +654,10 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
if(!file.open(QuaZip::mdUnzip)) if(!file.open(QuaZip::mdUnzip))
return; return;
if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) if(!file.setCurrentFile("xl/sharedStrings.xml", QuaZip::csSensitive)) {
file.close();
return; return;
}
QuaZipFile fileR(&file); QuaZipFile fileR(&file);
fileR.open(QIODevice::ReadOnly); fileR.open(QIODevice::ReadOnly);
@ -706,8 +712,10 @@ void FileUtils::getXlsxTextContent(QString &path, QString &textcontent) {
void FileUtils::getPdfTextContent(QString &path, QString &textcontent) { void FileUtils::getPdfTextContent(QString &path, QString &textcontent) {
Poppler::Document *doc = Poppler::Document::load(path); Poppler::Document *doc = Poppler::Document::load(path);
if(doc->isLocked()) if(doc->isLocked()) {
delete doc;
return; return;
}
const QRectF qf; const QRectF qf;
int pageNum = doc->numPages(); int pageNum = doc->numPages();
for(int i = 0; i < pageNum; ++i) { for(int i = 0; i < pageNum; ++i) {

View File

@ -108,12 +108,14 @@ void ConstructDocumentForContent::run() {
FileReader::getTextContent(m_path, content); FileReader::getTextContent(m_path, content);
if(content.isEmpty()) if(content.isEmpty())
return; return;
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path)); //QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep))); //QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
Document doc; Document doc;
doc.setData(content); doc.setData(content);
doc.setUniqueTerm(uniqueterm); //doc.setUniqueTerm(uniqueterm);
doc.addTerm(upTerm); doc.setUniqueTerm(FileUtils::makeDocUterm(m_path));
//doc.addTerm(upTerm);
doc.addTerm(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
doc.addValue(m_path); doc.addValue(m_path);
//'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info. //'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
@ -131,6 +133,7 @@ void ConstructDocumentForContent::run() {
IndexGenerator::_mutex_doc_list_content.unlock(); IndexGenerator::_mutex_doc_list_content.unlock();
content.clear(); content.clear();
content.squeeze(); content.squeeze();
term.clear(); term.clear();
term.shrink_to_fit(); term.shrink_to_fit();
return; return;

View File

@ -37,7 +37,7 @@ void Document::addPosting(std::string term, QVector<size_t> offset, int weight)
} }
} }
void Document::addPosting(std::string term, std::vector<size_t> offset, int weight) { void Document::addPosting(std::string &term, std::vector<size_t> &offset, int weight) {
if(term == "") if(term == "")
return; return;
if(term.length() > 240) if(term.length() > 240)
@ -63,6 +63,12 @@ void Document::addTerm(QString term) {
m_document.add_term(term.toStdString()); m_document.add_term(term.toStdString());
} }
void Document::addTerm(std::string term) {
if(term.empty())
return;
m_document.add_term(term);
}
void Document::addValue(QString value) { void Document::addValue(QString value) {
m_document.add_value(1, value.toStdString()); m_document.add_value(1, value.toStdString());
} }
@ -73,12 +79,20 @@ void Document::setUniqueTerm(QString term) {
m_document.add_term(term.toStdString()); m_document.add_term(term.toStdString());
// m_unique_term = new QString(term); // m_unique_term = new QString(term);
m_unique_term = std::move(term); m_unique_term = std::move(term.toStdString());
} }
void Document::setUniqueTerm(std::string term) {
if(term.empty())
return;
m_document.add_term(term);
m_unique_term = term;
}
std::string Document::getUniqueTerm() { std::string Document::getUniqueTerm() {
// qDebug()<<"m_unique_term!"<<*m_unique_term; // qDebug()<<"m_unique_term!"<<*m_unique_term;
// qDebug() << QString::fromStdString(m_unique_term.toStdString()); // qDebug() << QString::fromStdString(m_unique_term.toStdString());
return m_unique_term.toStdString(); return m_unique_term;//.toStdString();
} }
void Document::setIndexText(QStringList indexText) { void Document::setIndexText(QStringList indexText) {

View File

@ -41,11 +41,13 @@ public:
} }
void setData(QString &data); void setData(QString &data);
void addPosting(std::string term, QVector<size_t> offset, int weight = 1); void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
void addPosting(std::string term, std::vector<size_t> offset, int weight = 1); void addPosting(std::string &term, std::vector<size_t> &offset, int weight = 1);
void addPosting(std::string term, unsigned int offset, int weight = 1); void addPosting(std::string term, unsigned int offset, int weight = 1);
void addTerm(QString term); void addTerm(QString term);
void addTerm(std::string term);
void addValue(QString value); void addValue(QString value);
void setUniqueTerm(QString term); void setUniqueTerm(QString term);
void setUniqueTerm(std::string term);
std::string getUniqueTerm(); std::string getUniqueTerm();
void setIndexText(QStringList indexText); void setIndexText(QStringList indexText);
QStringList getIndexText(); QStringList getIndexText();
@ -53,7 +55,8 @@ public:
private: private:
Xapian::Document m_document; Xapian::Document m_document;
QStringList m_index_text; QStringList m_index_text;
QString m_unique_term; //QString m_unique_term;
std::string m_unique_term;
}; };
} }

View File

@ -154,7 +154,6 @@ void FirstIndex::run() {
++FileUtils::_index_status; ++FileUtils::_index_status;
pid_t pid; pid_t pid;
pid = fork(); pid = fork();
if(pid == 0) { if(pid == 0) {
@ -235,6 +234,7 @@ void FirstIndex::run() {
qDebug() << "content index end;"; qDebug() << "content index end;";
sem.release(2); sem.release(2);
}); });
mutex1.lock(); mutex1.lock();
mutex2.lock(); mutex2.lock();
mutex3.lock(); mutex3.lock();

View File

@ -29,7 +29,7 @@
#include "index-generator.h" #include "index-generator.h"
#include "chinese-segmentation.h" #include "chinese-segmentation.h"
#include <QStandardPaths> #include <QStandardPaths>
#include <malloc.h>
#define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString() #define INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/index_data").toStdString()
#define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString() #define CONTENT_INDEX_PATH (QStandardPaths::writableLocation(QStandardPaths::HomeLocation)+"/.config/org.ukui/ukui-search/content_index_data").toStdString()
@ -127,11 +127,11 @@ bool IndexGenerator::creatAllIndex(QQueue<QString> *messageList) {
// GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2"); // GlobalSettings::getInstance()->setValue(CONTENT_INDEX_DATABASE_STATE, "2");
// FileUtils::_index_status &= ~0x2; // FileUtils::_index_status &= ~0x2;
qDebug() << "finish creatAllIndex for content"; qDebug() << "finish creatAllIndex for content";
IndexGenerator::_doc_list_content.clear(); IndexGenerator::_doc_list_content.clear();
IndexGenerator::_doc_list_content.squeeze(); IndexGenerator::_doc_list_content.squeeze();
QVector<Document>().swap(IndexGenerator::_doc_list_content); QVector<Document>().swap(IndexGenerator::_doc_list_content);
// delete _doc_list_content; malloc_trim(0);
// _doc_list_content = nullptr;
} }
Q_EMIT this->transactionFinished(); Q_EMIT this->transactionFinished();
return true; return true;

View File

@ -1,6 +1,7 @@
#include "inotify-watch.h" #include "inotify-watch.h"
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <malloc.h> #include <malloc.h>
#include <errno.h>
using namespace Zeeker; using namespace Zeeker;
static InotifyWatch* global_instance_InotifyWatch = nullptr; static InotifyWatch* global_instance_InotifyWatch = nullptr;
@ -65,7 +66,7 @@ bool InotifyWatch::removeWatch(const QString &path, bool removeFromDatabase)
// qDebug() << i.value(); // qDebug() << i.value();
if(i.value().length() > path.length()) { if(i.value().length() > path.length()) {
if(i.value().startsWith(path)) { if(i.value().startsWith(path)) {
qDebug() << "remove path: " << i.value(); // qDebug() << "remove path: " << i.value();
inotify_rm_watch(m_inotifyFd, currentPath.key(path)); inotify_rm_watch(m_inotifyFd, currentPath.key(path));
currentPath.erase(i++); currentPath.erase(i++);
} else { } else {
@ -132,8 +133,18 @@ void InotifyWatch::run()
if (m_inotifyFd > 0) { if (m_inotifyFd > 0) {
qDebug()<<"Inotify init success!"; qDebug()<<"Inotify init success!";
} else { } else {
qWarning() << "Inotify init fail! Now try add inotify_user_instances.";
UkuiSearchQDBus usQDBus;
usQDBus.addInotifyUserInstances(128);
m_inotifyFd = inotify_init();
if (m_inotifyFd > 0) {
qDebug()<<"Inotify init success!";
} else {
printf("errno=%d\n",errno);
printf("Mesg:%s\n",strerror(errno));
Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify"); Q_ASSERT_X(0, "InotifyWatch", "Failed to initialize inotify");
} }
}
this->addWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation)); this->addWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation)); this->setPath(QStandardPaths::writableLocation(QStandardPaths::HomeLocation));
@ -205,10 +216,12 @@ void InotifyWatch::run()
assert(false); assert(false);
} }
} }
qDebug() << "Leave watch loop";
if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) { if(FileUtils::SearchMethod::DIRECTSEARCH == FileUtils::searchMethod) {
IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3"); IndexStatusRecorder::getInstance()->setStatus(INOTIFY_NORMAL_EXIT, "3");
removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false); removeWatch(QStandardPaths::writableLocation(QStandardPaths::HomeLocation), false);
} }
close(m_inotifyFd);
// fcntl(m_inotifyFd, F_SETFD, FD_CLOEXEC); // fcntl(m_inotifyFd, F_SETFD, FD_CLOEXEC);
// m_notifier = new QSocketNotifier(m_inotifyFd, QSocketNotifier::Read); // m_notifier = new QSocketNotifier(m_inotifyFd, QSocketNotifier::Read);
// connect(m_notifier, &QSocketNotifier::activated, this, &InotifyWatch::slotEvent, Qt::DirectConnection); // connect(m_notifier, &QSocketNotifier::activated, this, &InotifyWatch::slotEvent, Qt::DirectConnection);

View File

@ -44,3 +44,13 @@ void UkuiSearchQDBus::setInotifyMaxUserWatches() {
// /etc/sysctl.conf // /etc/sysctl.conf
// this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3"); // this->tmpSystemQDBusInterface->call("setInotifyMaxUserWatchesStep3");
} }
int UkuiSearchQDBus::addInotifyUserInstances(int addNum)
{
QDBusReply<int> reply = tmpSystemQDBusInterface->call("AddInotifyMaxUserInstance", addNum);
if(reply.isValid()) {
qDebug() << "Set inotify_max_user_instances to" << reply.value();
} else {
qWarning() << "Call AddInotifyMaxUserInstance failed!";
}
}

View File

@ -21,12 +21,14 @@
#define UKUISEARCHQDBUS_H #define UKUISEARCHQDBUS_H
#include <QDBusInterface> #include <QDBusInterface>
#include <QDBusReply>
namespace Zeeker { namespace Zeeker {
class UkuiSearchQDBus { class UkuiSearchQDBus {
public: public:
UkuiSearchQDBus(); UkuiSearchQDBus();
~UkuiSearchQDBus(); ~UkuiSearchQDBus();
void setInotifyMaxUserWatches(); void setInotifyMaxUserWatches();
int addInotifyUserInstances(int addNum);
private: private:
QDBusInterface* tmpSystemQDBusInterface; QDBusInterface* tmpSystemQDBusInterface;
}; };

View File

@ -34,7 +34,7 @@ include(settingsearch/settingsearch.pri))
LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation LIBS += -L$$OUT_PWD/../libchinese-segmentation/ -lchinese-segmentation
LIBS += -lxapian -lquazip5 -luchardet LIBS += -lxapian -lquazip5 -luchardet #-L/usr/local/lib/libjemalloc -ljemalloc
SOURCES += \ SOURCES += \
file-utils.cpp \ file-utils.cpp \

View File

@ -730,7 +730,7 @@ void ContentWidget::onListViewRowChanged(SearchListView * listview, const int &t
if(type == SearchItem::SearchType::Contents && !m_contentDetailList.isEmpty()) { if(type == SearchItem::SearchType::Contents && !m_contentDetailList.isEmpty()) {
m_detailView->isContent = true; m_detailView->isContent = true;
m_detailView->setContent(m_contentDetailList.at(listview->currentIndex().row()), m_keyword); m_detailView->setContent(m_contentDetailList.at(listview->currentIndex().row()), m_keyword);
} else if(type == SearchItem::SearchType::Best && !m_bestContent.isEmpty() && listview->currentIndex().row() == listview->getLength() - 1) { } else if(type == SearchItem::SearchType::Best && !m_bestContent.isEmpty() && SearchItem::SearchType::Contents == m_bestList.at(listview->currentIndex().row()).first) {
m_detailView->setContent(m_bestContent, m_keyword); m_detailView->setContent(m_bestContent, m_keyword);
m_detailView->isContent = true; m_detailView->isContent = true;
m_detailView->setupWidget(SearchItem::SearchType::Contents, path); m_detailView->setupWidget(SearchItem::SearchType::Contents, path);

View File

@ -129,6 +129,8 @@ MainWindow::MainWindow(QWidget *parent) :
this->m_searchLayout->focusIn(); //打开主界面时输入框夺焦,可直接输入 this->m_searchLayout->focusIn(); //打开主界面时输入框夺焦,可直接输入
this->raise(); this->raise();
this->activateWindow(); this->activateWindow();
} else if(this->isVisible()&&!this->isActiveWindow()) {
this->activateWindow();
} else { } else {
tryHideMainwindow(); tryHideMainwindow();
} }

View File

@ -9,7 +9,7 @@ TEMPLATE = app
PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0 PKGCONFIG += gio-2.0 glib-2.0 gio-unix-2.0
CONFIG += c++11 link_pkgconfig no_keywords lrelease CONFIG += c++11 link_pkgconfig no_keywords lrelease
LIBS += -lxapian -lgsettings-qt -lquazip5 -lX11 LIBS += -lxapian -lgsettings-qt -lquazip5 -lX11
LIBS += -lukui-log4qt #LIBS += -lukui-log4qt -L/usr/local/lib/libjemalloc -ljemalloc
# The following define makes your compiler emit warnings if you use # The following define makes your compiler emit warnings if you use
# any Qt feature that has been marked deprecated (the exact warnings # any Qt feature that has been marked deprecated (the exact warnings
# depend on your compiler). Please consult the documentation of the # depend on your compiler). Please consult the documentation of the

View File

@ -23,3 +23,4 @@ CONFIG += ordered \
QT += widgets QT += widgets

View File

@ -102,6 +102,36 @@ QString SysdbusRegister::setInotifyMaxUserWatchesStep3() {
return QString(ba); return QString(ba);
} }
int SysdbusRegister::AddInotifyMaxUserInstance(int addNum)
{
QFile file("/proc/sys/fs/inotify/max_user_instances");
if(!file.open(QIODevice::ReadOnly | QIODevice::Text))
return -1;
QTextStream ts(&file);
QString s = ts.read(512);
int instances = s.toInt() + addNum;
QByteArray ba;
FILE * fp = NULL;
char cmd[128];
char buf[1024];
sprintf(cmd, "sysctl -w fs.inotify.max_user_instances=\"%d\"", instances);
if((fp = popen(cmd, "r")) != NULL) {
rewind(fp);
while(!feof(fp)) {
fgets(buf, sizeof(buf), fp);
ba.append(buf);
}
pclose(fp);
fp = NULL;
} else {
qWarning() << "popen open failed";
return -1;
}
return instances;
}
//The following example comes from control center //The following example comes from control center
//void SysdbusRegister::setAutoLoginStatus(QString username) { //void SysdbusRegister::setAutoLoginStatus(QString username) {

View File

@ -52,6 +52,7 @@ public slots:
Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep1(); Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep1();
Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep2(); Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep2();
Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep3(); Q_SCRIPTABLE QString setInotifyMaxUserWatchesStep3();
Q_SCRIPTABLE int AddInotifyMaxUserInstance(int addNum);
// // 设置免密登录状态 // // 设置免密登录状态
// Q_SCRIPTABLE void setNoPwdLoginStatus(); // Q_SCRIPTABLE void setNoPwdLoginStatus();