forked from openkylin/ukui-search
Merge branch '0516ukss' into 'ukss-dev'
Encapsulate the basic interface of cppjieba. See merge request kylin-desktop/ukui-search!313
This commit is contained in:
commit
772458b8da
|
@ -0,0 +1,33 @@
|
||||||
|
#ifndef CHINESESEGMENTATIONPRIVATE_H
|
||||||
|
#define CHINESESEGMENTATIONPRIVATE_H
|
||||||
|
|
||||||
|
#include "chinese-segmentation.h"
|
||||||
|
#include "cppjieba/Jieba.hpp"
|
||||||
|
#include "cppjieba/KeywordExtractor.hpp"
|
||||||
|
|
||||||
|
class ChineseSegmentationPrivate
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ChineseSegmentationPrivate(ChineseSegmentation *parent = nullptr);
|
||||||
|
~ChineseSegmentationPrivate();
|
||||||
|
vector<KeyWord> callSegment(const string& sentence);
|
||||||
|
|
||||||
|
vector<string> callMixSegmentCutStr(const string& sentence);
|
||||||
|
vector<Word> callMixSegmentCutWord(const string& sentence);
|
||||||
|
string lookUpTagOfWord(const string& word);
|
||||||
|
vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
|
||||||
|
|
||||||
|
vector<Word> callFullSegment(const string& sentence);
|
||||||
|
|
||||||
|
vector<Word> callQuerySegment(const string& sentence);
|
||||||
|
|
||||||
|
vector<Word> callHMMSegment(const string& sentence);
|
||||||
|
|
||||||
|
vector<Word> callMPSegment(const string& sentence);
|
||||||
|
|
||||||
|
private:
|
||||||
|
cppjieba::Jieba *m_jieba;
|
||||||
|
ChineseSegmentation *q = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // CHINESESEGMENTATIONPRIVATE_H
|
|
@ -19,12 +19,10 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#include "chinese-segmentation.h"
|
#include "chinese-segmentation.h"
|
||||||
#include <QFileInfo>
|
#include "chinese-segmentation-private.h"
|
||||||
#include <QDebug>
|
|
||||||
static ChineseSegmentation *global_instance_chinese_segmentation = nullptr;
|
|
||||||
QMutex ChineseSegmentation::m_mutex;
|
|
||||||
|
|
||||||
ChineseSegmentation::ChineseSegmentation() {
|
ChineseSegmentationPrivate::ChineseSegmentationPrivate(ChineseSegmentation *parent) : q(parent)
|
||||||
|
{
|
||||||
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
|
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
|
||||||
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
|
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
|
||||||
const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
|
const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
|
||||||
|
@ -38,53 +36,127 @@ ChineseSegmentation::ChineseSegmentation() {
|
||||||
"");
|
"");
|
||||||
}
|
}
|
||||||
|
|
||||||
ChineseSegmentation::~ChineseSegmentation() {
|
ChineseSegmentationPrivate::~ChineseSegmentationPrivate() {
|
||||||
if(m_jieba)
|
if(m_jieba)
|
||||||
delete m_jieba;
|
delete m_jieba;
|
||||||
m_jieba = nullptr;
|
m_jieba = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
ChineseSegmentation *ChineseSegmentation::getInstance() {
|
vector<KeyWord> ChineseSegmentationPrivate::callSegment(const string &sentence) {
|
||||||
QMutexLocker locker(&m_mutex);
|
|
||||||
if(!global_instance_chinese_segmentation) {
|
|
||||||
global_instance_chinese_segmentation = new ChineseSegmentation;
|
|
||||||
}
|
|
||||||
return global_instance_chinese_segmentation;
|
|
||||||
}
|
|
||||||
|
|
||||||
QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
|
|
||||||
// std::string s;
|
|
||||||
// s = str.toStdString();
|
|
||||||
// str.squeeze();
|
|
||||||
|
|
||||||
const size_t topk = -1;
|
const size_t topk = -1;
|
||||||
std::vector<cppjieba::KeyWord> keywordres;
|
vector<KeyWord> keywordres;
|
||||||
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
|
ChineseSegmentationPrivate::m_jieba->extractor.Extract(sentence, keywordres, topk);
|
||||||
std::string().swap(s);
|
|
||||||
QVector<SKeyWord> vecNeeds;
|
|
||||||
convert(keywordres, vecNeeds);
|
|
||||||
|
|
||||||
keywordres.clear();
|
return keywordres;
|
||||||
// keywordres.shrink_to_fit();
|
|
||||||
return vecNeeds;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<cppjieba::KeyWord> ChineseSegmentation::callSegementStd(const std::string &str) {
|
vector<string> ChineseSegmentationPrivate::callMixSegmentCutStr(const string &sentence)
|
||||||
|
{
|
||||||
const size_t topk = -1;
|
vector<string> keywordres;
|
||||||
std::vector<cppjieba::KeyWord> keywordres;
|
ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
|
||||||
ChineseSegmentation::m_jieba->extractor.Extract(str, keywordres, topk);
|
|
||||||
|
|
||||||
return keywordres;
|
return keywordres;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChineseSegmentation::convert(std::vector<cppjieba::KeyWord> &keywordres, QVector<SKeyWord> &kw) {
|
vector<Word> ChineseSegmentationPrivate::callMixSegmentCutWord(const string &sentence)
|
||||||
for(auto i : keywordres) {
|
{
|
||||||
SKeyWord temp;
|
vector<Word> keywordres;
|
||||||
temp.word = i.word;
|
ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
|
||||||
temp.offsets = QVector<size_t>::fromStdVector(i.offsets);
|
return keywordres;
|
||||||
temp.weight = i.weight;
|
}
|
||||||
kw.append(temp);
|
|
||||||
}
|
string ChineseSegmentationPrivate::lookUpTagOfWord(const string &word)
|
||||||
|
{
|
||||||
|
return ChineseSegmentationPrivate::m_jieba->LookupTag(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<pair<string, string>> ChineseSegmentationPrivate::getTagOfWordsInSentence(const string &sentence)
|
||||||
|
{
|
||||||
|
vector<pair<string, string>> words;
|
||||||
|
ChineseSegmentationPrivate::m_jieba->Tag(sentence, words);
|
||||||
|
return words;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<Word> ChineseSegmentationPrivate::callFullSegment(const string &sentence)
|
||||||
|
{
|
||||||
|
vector<Word> keywordres;
|
||||||
|
ChineseSegmentationPrivate::m_jieba->CutAll(sentence, keywordres);
|
||||||
|
return keywordres;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<Word> ChineseSegmentationPrivate::callQuerySegment(const string &sentence)
|
||||||
|
{
|
||||||
|
vector<Word> keywordres;
|
||||||
|
ChineseSegmentationPrivate::m_jieba->CutForSearch(sentence, keywordres);
|
||||||
|
return keywordres;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<Word> ChineseSegmentationPrivate::callHMMSegment(const string &sentence)
|
||||||
|
{
|
||||||
|
vector<Word> keywordres;
|
||||||
|
ChineseSegmentationPrivate::m_jieba->CutHMM(sentence, keywordres);
|
||||||
|
return keywordres;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<Word> ChineseSegmentationPrivate::callMPSegment(const string &sentence)
|
||||||
|
{
|
||||||
|
size_t maxWordLen = 512;
|
||||||
|
vector<Word> keywordres;
|
||||||
|
ChineseSegmentationPrivate::m_jieba->CutSmall(sentence, keywordres, maxWordLen);
|
||||||
|
return keywordres;
|
||||||
|
}
|
||||||
|
|
||||||
|
ChineseSegmentation *ChineseSegmentation::getInstance()
|
||||||
|
{
|
||||||
|
static ChineseSegmentation *global_instance_chinese_segmentation = new ChineseSegmentation;
|
||||||
|
return global_instance_chinese_segmentation;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<KeyWord> ChineseSegmentation::callSegment(const string &sentence)
|
||||||
|
{
|
||||||
|
return d->callSegment(sentence);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<string> ChineseSegmentation::callMixSegmentCutStr(const string &sentence)
|
||||||
|
{
|
||||||
|
return d->callMixSegmentCutStr(sentence);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<Word> ChineseSegmentation::callMixSegmentCutWord(const string &str)
|
||||||
|
{
|
||||||
|
return d->callMixSegmentCutWord(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
string ChineseSegmentation::lookUpTagOfWord(const string &word)
|
||||||
|
{
|
||||||
|
return d->lookUpTagOfWord(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<pair<string, string> > ChineseSegmentation::getTagOfWordsInSentence(const string &sentence)
|
||||||
|
{
|
||||||
|
return d->getTagOfWordsInSentence(sentence);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<Word> ChineseSegmentation::callFullSegment(const string &sentence)
|
||||||
|
{
|
||||||
|
return d->callFullSegment(sentence);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<Word> ChineseSegmentation::callQuerySegment(const string &sentence)
|
||||||
|
{
|
||||||
|
return d->callQuerySegment(sentence);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<Word> ChineseSegmentation::callHMMSegment(const string &sentence)
|
||||||
|
{
|
||||||
|
return d->callHMMSegment(sentence);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<Word> ChineseSegmentation::callMPSegment(const string &sentence)
|
||||||
|
{
|
||||||
|
return d->callMPSegment(sentence);
|
||||||
|
}
|
||||||
|
|
||||||
|
ChineseSegmentation::ChineseSegmentation() : d(new ChineseSegmentationPrivate)
|
||||||
|
{
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,42 +22,95 @@
|
||||||
#define CHINESESEGMENTATION_H
|
#define CHINESESEGMENTATION_H
|
||||||
|
|
||||||
#include "libchinese-segmentation_global.h"
|
#include "libchinese-segmentation_global.h"
|
||||||
#include "cppjieba/Jieba.hpp"
|
#include "common-struct.h"
|
||||||
//#include "Logging.hpp"
|
|
||||||
//#include "LocalVector.hpp"
|
|
||||||
//#include "cppjieba/QuerySegment.hpp"
|
|
||||||
#include "cppjieba/KeywordExtractor.hpp"
|
|
||||||
#include <QVector>
|
|
||||||
#include <QString>
|
|
||||||
#include <QDebug>
|
|
||||||
#include <QMutex>
|
|
||||||
|
|
||||||
struct SKeyWord {
|
|
||||||
std::string word;
|
|
||||||
QVector<size_t> offsets;
|
|
||||||
double weight;
|
|
||||||
~SKeyWord() {
|
|
||||||
word = std::move("");
|
|
||||||
offsets.clear();
|
|
||||||
offsets.shrink_to_fit();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
class ChineseSegmentationPrivate;
|
||||||
class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
|
class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
|
||||||
public:
|
public:
|
||||||
static ChineseSegmentation *getInstance();
|
static ChineseSegmentation *getInstance();
|
||||||
QVector<SKeyWord> callSegement(std::string s);
|
|
||||||
std::vector<cppjieba::KeyWord> callSegementStd(const std::string& str);
|
/**
|
||||||
|
* @brief ChineseSegmentation::callSegment
|
||||||
|
* 调用extractor进行关键词提取,先使用Mix方式初步分词,再使用Idf词典进行关键词提取,只包含两字以上关键词
|
||||||
|
*
|
||||||
|
* @param sentence 要提取关键词的句子
|
||||||
|
* @return vector<KeyWord> 存放提取后关键词的信息的容器
|
||||||
|
*/
|
||||||
|
vector<KeyWord> callSegment(const string &sentence);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief ChineseSegmentation::callMixSegmentCutStr
|
||||||
|
* 使用Mix方法进行分词,即先使用最大概率法MP初步分词,再用隐式马尔科夫模型HMM进一步分词,可以准确切出词典已有词和未登录词,结果比较准确
|
||||||
|
*
|
||||||
|
* @param sentence 要分词的句子
|
||||||
|
* @return vector<string> 只存放分词后每个词的内容的容器
|
||||||
|
*/
|
||||||
|
vector<string> callMixSegmentCutStr(const string& sentence);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief ChineseSegmentation::callMixSegmentCutWord
|
||||||
|
* 和callMixSegmentCutStr功能相同
|
||||||
|
* @param sentence 要分词的句子
|
||||||
|
* @return vector<Word> 存放分词后每个词所有信息的容器
|
||||||
|
*/
|
||||||
|
vector<Word> callMixSegmentCutWord(const string& str);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief ChineseSegmentation::lookUpTagOfWord
|
||||||
|
* 查询word的词性
|
||||||
|
* @param word 要查询词性的词
|
||||||
|
* @return string word的词性
|
||||||
|
*/
|
||||||
|
string lookUpTagOfWord(const string& word);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief ChineseSegmentation::getTagOfWordsInSentence
|
||||||
|
* 使用Mix分词后获取每个词的词性
|
||||||
|
* @param sentence 要分词的句子
|
||||||
|
* @return vector<pair<string, string>> 分词后的每个词的内容(firsr)和其对应的词性(second)
|
||||||
|
*/
|
||||||
|
vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief ChineseSegmentation::callFullSegment
|
||||||
|
* 使用Full进行分词,Full会切出字典里所有的词。
|
||||||
|
* @param sentence 要分词的句子
|
||||||
|
* @return vector<Word> 存放分词后每个词所有信息的容器
|
||||||
|
*/
|
||||||
|
vector<Word> callFullSegment(const string& sentence);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief ChineseSegmentation::callQuerySegment
|
||||||
|
* 使用Query进行分词,即先使用Mix,对于长词再用Full,结果最精确,但词的数量也最大
|
||||||
|
* @param sentence 要分词的句子
|
||||||
|
* @return vector<Word> 存放分词后每个词所有信息的容器
|
||||||
|
*/
|
||||||
|
vector<Word> callQuerySegment(const string& sentence);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief ChineseSegmentation::callHMMSegment
|
||||||
|
* 使用隐式马尔科夫模型HMM进行分词
|
||||||
|
* @param sentence 要分词的句子
|
||||||
|
* @return vector<Word> 存放分词后每个词所有信息的容器
|
||||||
|
*/
|
||||||
|
vector<Word> callHMMSegment(const string& sentence);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief ChineseSegmentation::callMPSegment
|
||||||
|
* 使用最大概率法MP进行分词
|
||||||
|
* @param sentence 要分词的句子
|
||||||
|
* @return vector<Word> 存放分词后每个词所有信息的容器
|
||||||
|
*/
|
||||||
|
vector<Word> callMPSegment(const string& sentence);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
explicit ChineseSegmentation();
|
explicit ChineseSegmentation();
|
||||||
~ChineseSegmentation();
|
~ChineseSegmentation() = default;
|
||||||
void convert(std::vector<cppjieba::KeyWord>& keywordres, QVector<SKeyWord>& kw);
|
ChineseSegmentation(const ChineseSegmentation&) = delete;
|
||||||
|
ChineseSegmentation& operator =(const ChineseSegmentation&) = delete;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static QMutex m_mutex;
|
ChineseSegmentationPrivate *d = nullptr;
|
||||||
cppjieba::Jieba *m_jieba;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // CHINESESEGMENTATION_H
|
#endif // CHINESESEGMENTATION_H
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
#ifndef COMMONSTRUCT_H
|
||||||
|
#define COMMONSTRUCT_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The KeyWord struct
|
||||||
|
*
|
||||||
|
* @property word the content of keyword
|
||||||
|
* @property offsets the Unicode offsets, can be used to check the word pos in a sentence
|
||||||
|
* @property weight the weight of the keyword
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct KeyWord {
|
||||||
|
string word;
|
||||||
|
vector<size_t> offsets;
|
||||||
|
double weight;
|
||||||
|
~KeyWord() {
|
||||||
|
word = std::move("");
|
||||||
|
offsets.clear();
|
||||||
|
offsets.shrink_to_fit();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The Word struct
|
||||||
|
*
|
||||||
|
* @property word the content of word
|
||||||
|
* @property offset the offset of the word(absolute pos, Chinese 3 , English 1), can be used to check the word pos in a sentence
|
||||||
|
* @property unicode_offset the Unicode offset of the word
|
||||||
|
* @property unicode_length the Unicode length of the word
|
||||||
|
*/
|
||||||
|
struct Word {
|
||||||
|
string word;
|
||||||
|
uint32_t offset;
|
||||||
|
uint32_t unicode_offset;
|
||||||
|
uint32_t unicode_length;
|
||||||
|
Word(const string& w, uint32_t o)
|
||||||
|
: word(w), offset(o) {
|
||||||
|
}
|
||||||
|
Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
|
||||||
|
: word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
|
||||||
|
}
|
||||||
|
~Word() {
|
||||||
|
word = std::move("");
|
||||||
|
}
|
||||||
|
}; // struct Word
|
||||||
|
|
||||||
|
#endif // COMMONSTRUCT_H
|
|
@ -63,7 +63,7 @@ public:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isMultiTone(string &word) {
|
bool isMultiTone(const string &word) {
|
||||||
if (qmap_chinese2pinyin.contains(QString::fromStdString(word)))
|
if (qmap_chinese2pinyin.contains(QString::fromStdString(word)))
|
||||||
return true;
|
return true;
|
||||||
// if (map_chinese2pinyin.contains(word))
|
// if (map_chinese2pinyin.contains(word))
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
#include "limonp/LocalVector.hpp"
|
#include "limonp/LocalVector.hpp"
|
||||||
#include "limonp/StringUtil.hpp"
|
#include "limonp/StringUtil.hpp"
|
||||||
|
#include "common-struct.h"
|
||||||
|
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
|
|
||||||
|
@ -15,24 +16,24 @@ using std::vector;
|
||||||
|
|
||||||
typedef uint32_t Rune;
|
typedef uint32_t Rune;
|
||||||
|
|
||||||
struct KeyWord {
|
//struct KeyWord {
|
||||||
string word;
|
// string word;
|
||||||
vector<size_t> offsets;
|
// vector<size_t> offsets;
|
||||||
double weight;
|
// double weight;
|
||||||
}; // struct Word
|
//}; // struct Word
|
||||||
|
|
||||||
struct Word {
|
//struct Word {
|
||||||
string word;
|
// string word;
|
||||||
uint32_t offset;
|
// uint32_t offset;
|
||||||
uint32_t unicode_offset;
|
// uint32_t unicode_offset;
|
||||||
uint32_t unicode_length;
|
// uint32_t unicode_length;
|
||||||
Word(const string& w, uint32_t o)
|
// Word(const string& w, uint32_t o)
|
||||||
: word(w), offset(o) {
|
// : word(w), offset(o) {
|
||||||
}
|
// }
|
||||||
Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
|
// Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
|
||||||
: word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
|
// : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
|
||||||
}
|
// }
|
||||||
}; // struct Word
|
//}; // struct Word
|
||||||
|
|
||||||
inline std::ostream& operator << (std::ostream& os, const Word& w) {
|
inline std::ostream& operator << (std::ostream& os, const Word& w) {
|
||||||
return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
|
return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
#include "chinese-segmentation.h"
|
|
@ -0,0 +1 @@
|
||||||
|
#include "hanzi-to-pinyin.h"
|
|
@ -0,0 +1,29 @@
|
||||||
|
#ifndef HANZITOPINYINPRIVATE_H
|
||||||
|
#define HANZITOPINYINPRIVATE_H
|
||||||
|
|
||||||
|
#include <QtCore/qglobal.h>
|
||||||
|
#include "cppjieba/PinYinTrie.hpp"
|
||||||
|
#include "hanzi-to-pinyin.h"
|
||||||
|
|
||||||
|
#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
class PINYINMANAGER_EXPORT HanZiToPinYinPrivate
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
HanZiToPinYinPrivate(HanZiToPinYin *parent = nullptr);
|
||||||
|
~HanZiToPinYinPrivate();
|
||||||
|
|
||||||
|
public:
|
||||||
|
template <typename T>
|
||||||
|
bool isMultiTone(T &&t) {return m_pinYinTrie->isMultiTone(std::forward<T>(t));}
|
||||||
|
|
||||||
|
bool contains(string &word);
|
||||||
|
int getResults(string word, QStringList &results);
|
||||||
|
|
||||||
|
private:
|
||||||
|
cppjieba::PinYinTrie *m_pinYinTrie = nullptr;
|
||||||
|
HanZiToPinYin *q = nullptr;
|
||||||
|
};
|
||||||
|
#endif // HANZITOPINYINPRIVATE_H
|
|
@ -0,0 +1,83 @@
|
||||||
|
#include "hanzi-to-pinyin.h"
|
||||||
|
#include "hanzi-to-pinyin-private.h"
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
HanZiToPinYin * HanZiToPinYin::g_pinYinManager = nullptr;
|
||||||
|
std::once_flag g_singleFlag;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bool HanZiToPinYinPrivate::contains(string &word)
|
||||||
|
{
|
||||||
|
return m_pinYinTrie->contains(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
int HanZiToPinYinPrivate::getResults(string word, QStringList &results)
|
||||||
|
{
|
||||||
|
results.clear();
|
||||||
|
if (-1 != m_pinYinTrie->getMultiTonResults(word, results)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
QString tmp;
|
||||||
|
if (-1 != m_pinYinTrie->getSingleTonResult(word, tmp)) {
|
||||||
|
results.append(tmp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
HanZiToPinYinPrivate::HanZiToPinYinPrivate(HanZiToPinYin *parent) : q(parent)
|
||||||
|
{
|
||||||
|
const char * const PINYIN_PATH = "/usr/share/ukui-search/res/dict/pinyinWithoutTone.txt";
|
||||||
|
m_pinYinTrie = new cppjieba::PinYinTrie(PINYIN_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
HanZiToPinYinPrivate::~HanZiToPinYinPrivate()
|
||||||
|
{
|
||||||
|
if (m_pinYinTrie){
|
||||||
|
delete m_pinYinTrie;
|
||||||
|
m_pinYinTrie = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HanZiToPinYin * HanZiToPinYin::getInstance()
|
||||||
|
{
|
||||||
|
call_once(g_singleFlag, []() {
|
||||||
|
g_pinYinManager = new HanZiToPinYin;
|
||||||
|
});
|
||||||
|
return g_pinYinManager;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HanZiToPinYin::contains(string &word)
|
||||||
|
{
|
||||||
|
return d->contains(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HanZiToPinYin::isMultiTone(string &word)
|
||||||
|
{
|
||||||
|
return d->isMultiTone(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HanZiToPinYin::isMultiTone(string &&word)
|
||||||
|
{
|
||||||
|
return d->isMultiTone(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HanZiToPinYin::isMultiTone(const string &word)
|
||||||
|
{
|
||||||
|
return d->isMultiTone(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HanZiToPinYin::isMultiTone(const string &&word)
|
||||||
|
{
|
||||||
|
return d->isMultiTone(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
int HanZiToPinYin::getResults(string word, QStringList &results)
|
||||||
|
{
|
||||||
|
return d->getResults(word, results);
|
||||||
|
}
|
||||||
|
|
||||||
|
HanZiToPinYin::HanZiToPinYin() : d(new HanZiToPinYinPrivate)
|
||||||
|
{
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
#ifndef HANZITOPINYIN_H
|
||||||
|
#define HANZITOPINYIN_H
|
||||||
|
|
||||||
|
#include <QtCore/qglobal.h>
|
||||||
|
//#include "cppjieba/PinYinTrie.hpp"
|
||||||
|
#include <QStringList>
|
||||||
|
#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
class HanZiToPinYinPrivate;
|
||||||
|
class PINYINMANAGER_EXPORT HanZiToPinYin
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static HanZiToPinYin * getInstance();
|
||||||
|
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief HanZiToPinYin::isMultiTone 判断是否为多音字(只支持单字)
|
||||||
|
* @param word 要判断的字
|
||||||
|
* @return bool 不是多音字或不是单字返回false
|
||||||
|
*/
|
||||||
|
bool isMultiTone(string &word);
|
||||||
|
bool isMultiTone(string &&word);
|
||||||
|
bool isMultiTone(const string &word);
|
||||||
|
bool isMultiTone(const string &&word);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief HanZiToPinYin::contains 查询某个字是否有拼音(是否在数据库包含,只支持单字)
|
||||||
|
* @param word 要查询的字
|
||||||
|
* @return bool 数据库不包含或不是单字返回false
|
||||||
|
*/
|
||||||
|
bool contains(string &word);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief HanZiToPinYin::getResults 获取某个字的拼音(只支持单字)
|
||||||
|
* @param word 要获取拼音的字
|
||||||
|
* @param results word的拼音列表(有可能多音字),每次调用results会清空
|
||||||
|
* @return int 获取到返回0,否则返回-1
|
||||||
|
*/
|
||||||
|
int getResults(string word, QStringList &results);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
HanZiToPinYin();
|
||||||
|
~HanZiToPinYin();
|
||||||
|
HanZiToPinYin(const HanZiToPinYin&) = delete;
|
||||||
|
HanZiToPinYin& operator =(const HanZiToPinYin&) = delete;
|
||||||
|
private:
|
||||||
|
static HanZiToPinYin *g_pinYinManager;
|
||||||
|
HanZiToPinYinPrivate *d = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // PINYINMANAGER_H
|
|
@ -5,13 +5,16 @@ TARGET = chinese-segmentation
|
||||||
TEMPLATE = lib
|
TEMPLATE = lib
|
||||||
DEFINES += LIBCHINESESEGMENTATION_LIBRARY
|
DEFINES += LIBCHINESESEGMENTATION_LIBRARY
|
||||||
|
|
||||||
CONFIG += c++11
|
CONFIG += c++11 create_pc create_prl no_install_prl
|
||||||
|
|
||||||
# The following define makes your compiler emit warnings if you use
|
# The following define makes your compiler emit warnings if you use
|
||||||
# any Qt feature that has been marked deprecated (the exact warnings
|
# any Qt feature that has been marked deprecated (the exact warnings
|
||||||
# depend on your compiler). Please consult the documentation of the
|
# depend on your compiler). Please consult the documentation of the
|
||||||
# deprecated API in order to know how to port your code away from it.
|
# deprecated API in order to know how to port your code away from it.
|
||||||
DEFINES += QT_DEPRECATED_WARNINGS
|
DEFINES += QT_DEPRECATED_WARNINGS
|
||||||
|
QMAKE_CXXFLAGS += -Werror=return-type -Werror=return-local-addr
|
||||||
|
#QMAKE_CXXFLAGS += -Werror=uninitialized
|
||||||
|
QMAKE_CXXFLAGS += -execution-charset:utf-8
|
||||||
|
|
||||||
# You can also make your code fail to compile if it uses deprecated APIs.
|
# You can also make your code fail to compile if it uses deprecated APIs.
|
||||||
# In order to do so, uncomment the following line.
|
# In order to do so, uncomment the following line.
|
||||||
|
@ -23,12 +26,15 @@ include(cppjieba/cppjieba.pri)
|
||||||
|
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
chinese-segmentation.cpp \
|
chinese-segmentation.cpp \
|
||||||
pinyinmanager.cpp
|
hanzi-to-pinyin.cpp
|
||||||
|
|
||||||
HEADERS += \
|
HEADERS += \
|
||||||
|
chinese-segmentation-private.h \
|
||||||
chinese-segmentation.h \
|
chinese-segmentation.h \
|
||||||
libchinese-segmentation_global.h \
|
common-struct.h \
|
||||||
pinyinmanager.h
|
hanzi-to-pinyin-private.h \
|
||||||
|
hanzi-to-pinyin.h \
|
||||||
|
libchinese-segmentation_global.h
|
||||||
|
|
||||||
dict_files.path = /usr/share/ukui-search/res/dict/
|
dict_files.path = /usr/share/ukui-search/res/dict/
|
||||||
dict_files.files = $$PWD/dict/*.utf8\
|
dict_files.files = $$PWD/dict/*.utf8\
|
||||||
|
@ -41,14 +47,24 @@ INSTALLS += \
|
||||||
# Default rules for deployment.
|
# Default rules for deployment.
|
||||||
unix {
|
unix {
|
||||||
target.path = $$[QT_INSTALL_LIBS]
|
target.path = $$[QT_INSTALL_LIBS]
|
||||||
}
|
QMAKE_PKGCONFIG_NAME = chinese-segmentation
|
||||||
|
QMAKE_PKGCONFIG_DESCRIPTION = chinese-segmentation Header files
|
||||||
|
QMAKE_PKGCONFIG_VERSION = $$VERSION
|
||||||
|
QMAKE_PKGCONFIG_LIBDIR = $$target.path
|
||||||
|
QMAKE_PKGCONFIG_DESTDIR = pkgconfig
|
||||||
|
QMAKE_PKGCONFIG_INCDIR = /usr/include/chinese-seg
|
||||||
|
QMAKE_PKGCONFIG_CFLAGS += -I/usr/include/chinese-seg
|
||||||
|
|
||||||
!isEmpty(target.path): INSTALLS += target
|
!isEmpty(target.path): INSTALLS += target
|
||||||
|
|
||||||
header.path = /usr/include/chinese-seg/
|
header.path = /usr/include/chinese-seg
|
||||||
header.files += *.h
|
header.files += chinese-segmentation.h libchinese-segmentation_global.h common-struct.h hanzi-to-pinyin.h
|
||||||
headercppjieba.path = /usr/include/chinese-seg/cppjieba/
|
header.files += development-files/header-files/*
|
||||||
headercppjieba.files = cppjieba/*
|
# headercppjieba.path = /usr/include/chinese-seg/cppjieba/
|
||||||
INSTALLS += header headercppjieba
|
# headercppjieba.files = cppjieba/*
|
||||||
|
INSTALLS += header
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#DISTFILES += \
|
#DISTFILES += \
|
||||||
# jiaba/jieba.pri
|
# jiaba/jieba.pri
|
||||||
|
@ -64,5 +80,5 @@ DISTFILES += \
|
||||||
dict/pos_dict/prob_trans.utf8 \
|
dict/pos_dict/prob_trans.utf8 \
|
||||||
dict/stop_words.utf8 \
|
dict/stop_words.utf8 \
|
||||||
dict/user.dict.utf8 \
|
dict/user.dict.utf8 \
|
||||||
dict/pinyinWithoutTone.txt
|
dict/pinyinWithoutTone.txt \
|
||||||
|
development-files/header-files/* \
|
||||||
|
|
|
@ -1,55 +0,0 @@
|
||||||
#include "pinyinmanager.h"
|
|
||||||
#include <mutex>
|
|
||||||
PinYinManager * PinYinManager::g_pinYinManager = nullptr;
|
|
||||||
std::once_flag g_singleFlag;
|
|
||||||
PinYinManager * PinYinManager::getInstance()
|
|
||||||
{
|
|
||||||
call_once(g_singleFlag, []() {
|
|
||||||
g_pinYinManager = new PinYinManager;
|
|
||||||
});
|
|
||||||
return g_pinYinManager;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PinYinManager::contains(string &word)
|
|
||||||
{
|
|
||||||
return m_pinYinTrie->contains(word);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PinYinManager::isMultiTon(string &word)
|
|
||||||
{
|
|
||||||
return m_pinYinTrie->isMultiTone(word);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PinYinManager::isMultiTon(string word)
|
|
||||||
{
|
|
||||||
return m_pinYinTrie->isMultiTone(word);
|
|
||||||
}
|
|
||||||
|
|
||||||
int PinYinManager::getResults(string word, QStringList &results)
|
|
||||||
{
|
|
||||||
results.clear();
|
|
||||||
if (-1 != m_pinYinTrie->getMultiTonResults(word, results)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
QString tmp;
|
|
||||||
if (-1 != m_pinYinTrie->getSingleTonResult(word, tmp)) {
|
|
||||||
results.append(tmp);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
PinYinManager::PinYinManager()
|
|
||||||
{
|
|
||||||
const char * const PINYIN_PATH = "/usr/share/ukui-search/res/dict/pinyinWithoutTone.txt";
|
|
||||||
m_pinYinTrie = new cppjieba::PinYinTrie(PINYIN_PATH);
|
|
||||||
}
|
|
||||||
|
|
||||||
PinYinManager::~PinYinManager()
|
|
||||||
{
|
|
||||||
if (m_pinYinTrie){
|
|
||||||
delete m_pinYinTrie;
|
|
||||||
m_pinYinTrie = nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,33 +0,0 @@
|
||||||
#ifndef PINYINMANAGER_H
|
|
||||||
#define PINYINMANAGER_H
|
|
||||||
|
|
||||||
#include <QtCore/qglobal.h>
|
|
||||||
#include "cppjieba/PinYinTrie.hpp"
|
|
||||||
|
|
||||||
#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
class PINYINMANAGER_EXPORT PinYinManager
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
static PinYinManager * getInstance();
|
|
||||||
|
|
||||||
public:
|
|
||||||
bool contains(string &word);
|
|
||||||
bool isMultiTon(string &word);
|
|
||||||
bool isMultiTon(string word);
|
|
||||||
|
|
||||||
int getResults(string word, QStringList &results);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
PinYinManager();
|
|
||||||
~PinYinManager();
|
|
||||||
|
|
||||||
private:
|
|
||||||
static PinYinManager *g_pinYinManager;
|
|
||||||
cppjieba::PinYinTrie *m_pinYinTrie = nullptr;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // PINYINMANAGER_H
|
|
|
@ -27,7 +27,7 @@
|
||||||
#include <QDBusConnection>
|
#include <QDBusConnection>
|
||||||
#include <QDomDocument>
|
#include <QDomDocument>
|
||||||
#include "gobject-template.h"
|
#include "gobject-template.h"
|
||||||
#include "pinyinmanager.h"
|
#include "hanzi-to-pinyin.h"
|
||||||
|
|
||||||
using namespace UkuiSearch;
|
using namespace UkuiSearch;
|
||||||
size_t FileUtils::maxIndexCount = 0;
|
size_t FileUtils::maxIndexCount = 0;
|
||||||
|
@ -413,14 +413,14 @@ void stitchMultiToneWordsBFSStackLess3(const QString &hanzi, QStringList &result
|
||||||
int multiToneWordNum = 0;
|
int multiToneWordNum = 0;
|
||||||
|
|
||||||
for (auto i:hanzi) {
|
for (auto i:hanzi) {
|
||||||
if (PinYinManager::getInstance()->isMultiTon(QString(i).toStdString()))
|
if (HanZiToPinYin::getInstance()->isMultiTone(QString(i).toStdString()))
|
||||||
++multiToneWordNum;
|
++multiToneWordNum;
|
||||||
}
|
}
|
||||||
if(multiToneWordNum > 3) {
|
if(multiToneWordNum > 3) {
|
||||||
QString oneResult, oneResultFirst;
|
QString oneResult, oneResultFirst;
|
||||||
for(auto i : hanzi) {
|
for(auto i : hanzi) {
|
||||||
QStringList results;
|
QStringList results;
|
||||||
PinYinManager::getInstance()->getResults(QString(i).toStdString(), results);
|
HanZiToPinYin::getInstance()->getResults(QString(i).toStdString(), results);
|
||||||
if(results.size()) {
|
if(results.size()) {
|
||||||
oneResult += results.first();
|
oneResult += results.first();
|
||||||
oneResultFirst += results.first().at(0);
|
oneResultFirst += results.first().at(0);
|
||||||
|
@ -435,7 +435,7 @@ void stitchMultiToneWordsBFSStackLess3(const QString &hanzi, QStringList &result
|
||||||
}
|
}
|
||||||
|
|
||||||
QStringList results;
|
QStringList results;
|
||||||
PinYinManager::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
|
HanZiToPinYin::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
|
||||||
if(results.size()) {
|
if(results.size()) {
|
||||||
for(auto i : results) {
|
for(auto i : results) {
|
||||||
tempQueue.enqueue(i);
|
tempQueue.enqueue(i);
|
||||||
|
@ -447,7 +447,7 @@ void stitchMultiToneWordsBFSStackLess3(const QString &hanzi, QStringList &result
|
||||||
}
|
}
|
||||||
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
|
||||||
while(tempHanzi.size() != 0) {
|
while(tempHanzi.size() != 0) {
|
||||||
PinYinManager::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
|
HanZiToPinYin::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
|
||||||
tempQueueSize = tempQueue.size();
|
tempQueueSize = tempQueue.size();
|
||||||
if(results.size()) {
|
if(results.size()) {
|
||||||
for(int j = 0; j < tempQueueSize; ++j) {
|
for(int j = 0; j < tempQueueSize; ++j) {
|
||||||
|
|
|
@ -118,7 +118,7 @@ void ConstructDocumentForContent::run() {
|
||||||
doc.setData(content);
|
doc.setData(content);
|
||||||
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
||||||
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
||||||
std::vector<cppjieba::KeyWord> term = ChineseSegmentation::getInstance()->callSegementStd(content.left(20480000).toStdString());
|
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString());
|
||||||
for(size_t i = 0; i < term.size(); ++i) {
|
for(size_t i = 0; i < term.size(); ++i) {
|
||||||
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
|
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
|
||||||
}
|
}
|
||||||
|
@ -158,7 +158,7 @@ void ConstructDocumentForOcr::run()
|
||||||
doc.setData(content);
|
doc.setData(content);
|
||||||
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
||||||
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
||||||
std::vector<cppjieba::KeyWord> term = ChineseSegmentation::getInstance()->callSegementStd(content.toStdString());
|
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.toStdString());
|
||||||
for(size_t i = 0; i < term.size(); ++i) {
|
for(size_t i = 0; i < term.size(); ++i) {
|
||||||
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
|
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
|
||||||
}
|
}
|
||||||
|
|
|
@ -414,15 +414,15 @@ Document IndexGenerator::GenerateContentDocument(const QString &path) {
|
||||||
// 构造文本索引的document
|
// 构造文本索引的document
|
||||||
QString content;
|
QString content;
|
||||||
QStringList tmp;
|
QStringList tmp;
|
||||||
QVector<SKeyWord> term;
|
std::vector<KeyWord> term;
|
||||||
SKeyWord skw;
|
KeyWord skw;
|
||||||
Document doc;
|
Document doc;
|
||||||
QString uniqueterm;
|
QString uniqueterm;
|
||||||
QString upTerm;
|
QString upTerm;
|
||||||
QString suffix;
|
QString suffix;
|
||||||
FileReader::getTextContent(path, content, suffix);
|
FileReader::getTextContent(path, content, suffix);
|
||||||
|
|
||||||
term = ChineseSegmentation::getInstance()->callSegement(content.toStdString());
|
term = ChineseSegmentation::getInstance()->callSegment(content.toStdString());
|
||||||
// QStringList term = content.split("");
|
// QStringList term = content.split("");
|
||||||
|
|
||||||
doc.setData(content);
|
doc.setData(content);
|
||||||
|
|
|
@ -272,7 +272,7 @@ int FileContentSearch::keywordSearchContent() {
|
||||||
qp.set_default_op(Xapian::Query::OP_AND);
|
qp.set_default_op(Xapian::Query::OP_AND);
|
||||||
qp.set_database(db);
|
qp.set_database(db);
|
||||||
|
|
||||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
|
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
|
||||||
//Creat a query
|
//Creat a query
|
||||||
std::string words;
|
std::string words;
|
||||||
for(int i = 0; i < sKeyWord.size(); i++) {
|
for(int i = 0; i < sKeyWord.size(); i++) {
|
||||||
|
@ -446,7 +446,7 @@ int OcrSearch::keywordSearchOcr() {
|
||||||
Xapian::QueryParser qp;
|
Xapian::QueryParser qp;
|
||||||
qp.set_default_op(Xapian::Query::OP_AND);
|
qp.set_default_op(Xapian::Query::OP_AND);
|
||||||
qp.set_database(db);
|
qp.set_database(db);
|
||||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
|
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
|
||||||
//Creat a query
|
//Creat a query
|
||||||
std::string words;
|
std::string words;
|
||||||
for(int i = 0; i < sKeyWord.size(); i++) {
|
for(int i = 0; i < sKeyWord.size(); i++) {
|
||||||
|
|
|
@ -154,9 +154,9 @@ NoteSearch::NoteSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, c
|
||||||
}
|
}
|
||||||
|
|
||||||
void NoteSearch::run() {
|
void NoteSearch::run() {
|
||||||
QVector<SKeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
|
std::vector<KeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
|
||||||
QStringList keywordList;
|
QStringList keywordList;
|
||||||
for (SKeyWord sKeyWord : sKeyWordVec) {
|
for (KeyWord sKeyWord : sKeyWordVec) {
|
||||||
keywordList.append(QString::fromStdString(sKeyWord.word));
|
keywordList.append(QString::fromStdString(sKeyWord.word));
|
||||||
}
|
}
|
||||||
QDBusInterface qi("org.ukui.note", "/org/ukui/note", "org.ukui.note.interface", QDBusConnection::sessionBus());
|
QDBusInterface qi("org.ukui.note", "/org/ukui/note", "org.ukui.note.interface", QDBusConnection::sessionBus());
|
||||||
|
|
|
@ -165,7 +165,7 @@ inline Xapian::Query FileContentSearchWorker::createQuery()
|
||||||
std::vector<Xapian::Query> v;
|
std::vector<Xapian::Query> v;
|
||||||
|
|
||||||
for (const auto &keyword : m_searchController->getKeyword()) {
|
for (const auto &keyword : m_searchController->getKeyword()) {
|
||||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword.toStdString());
|
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(keyword.toStdString());
|
||||||
|
|
||||||
for(const auto & c : sKeyWord) {
|
for(const auto & c : sKeyWord) {
|
||||||
v.emplace_back(c.word);
|
v.emplace_back(c.word);
|
||||||
|
|
Loading…
Reference in New Issue