Encapsulate the basic interface of cppjieba and hanzi to pinyin.

Use the perfect forword to optimize the 'isMultiTone' interface.
This commit is contained in:
baijunjie 2022-05-17 15:33:19 +08:00
parent 76c1a7f73b
commit 7daa82e66f
21 changed files with 508 additions and 202 deletions

2
debian

@ -1 +1 @@
Subproject commit 965e490d6438d89d477afb1f9a2cdd93f3a5d401
Subproject commit b20439faa51ad6179ea825befbcf4e99a5ebc002

View File

@ -0,0 +1,33 @@
#ifndef CHINESESEGMENTATIONPRIVATE_H
#define CHINESESEGMENTATIONPRIVATE_H
#include "chinese-segmentation.h"
#include "cppjieba/Jieba.hpp"
#include "cppjieba/KeywordExtractor.hpp"
class ChineseSegmentationPrivate
{
public:
explicit ChineseSegmentationPrivate(ChineseSegmentation *parent = nullptr);
~ChineseSegmentationPrivate();
vector<KeyWord> callSegment(const string& sentence);
vector<string> callMixSegmentCutStr(const string& sentence);
vector<Word> callMixSegmentCutWord(const string& sentence);
string lookUpTagOfWord(const string& word);
vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
vector<Word> callFullSegment(const string& sentence);
vector<Word> callQuerySegment(const string& sentence);
vector<Word> callHMMSegment(const string& sentence);
vector<Word> callMPSegment(const string& sentence);
private:
cppjieba::Jieba *m_jieba;
ChineseSegmentation *q = nullptr;
};
#endif // CHINESESEGMENTATIONPRIVATE_H

View File

@ -19,12 +19,10 @@
*
*/
#include "chinese-segmentation.h"
#include <QFileInfo>
#include <QDebug>
static ChineseSegmentation *global_instance_chinese_segmentation = nullptr;
QMutex ChineseSegmentation::m_mutex;
#include "chinese-segmentation-private.h"
ChineseSegmentation::ChineseSegmentation() {
ChineseSegmentationPrivate::ChineseSegmentationPrivate(ChineseSegmentation *parent) : q(parent)
{
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
const char * const USER_DICT_PATH = "/usr/share/ukui-search/res/dict/user.dict.utf8";
@ -38,53 +36,127 @@ ChineseSegmentation::ChineseSegmentation() {
"");
}
ChineseSegmentation::~ChineseSegmentation() {
ChineseSegmentationPrivate::~ChineseSegmentationPrivate() {
if(m_jieba)
delete m_jieba;
m_jieba = nullptr;
}
ChineseSegmentation *ChineseSegmentation::getInstance() {
QMutexLocker locker(&m_mutex);
if(!global_instance_chinese_segmentation) {
global_instance_chinese_segmentation = new ChineseSegmentation;
}
return global_instance_chinese_segmentation;
}
QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
// std::string s;
// s = str.toStdString();
// str.squeeze();
vector<KeyWord> ChineseSegmentationPrivate::callSegment(const string &sentence) {
const size_t topk = -1;
std::vector<cppjieba::KeyWord> keywordres;
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
std::string().swap(s);
QVector<SKeyWord> vecNeeds;
convert(keywordres, vecNeeds);
vector<KeyWord> keywordres;
ChineseSegmentationPrivate::m_jieba->extractor.Extract(sentence, keywordres, topk);
keywordres.clear();
// keywordres.shrink_to_fit();
return vecNeeds;
return keywordres;
}
std::vector<cppjieba::KeyWord> ChineseSegmentation::callSegementStd(const std::string &str) {
const size_t topk = -1;
std::vector<cppjieba::KeyWord> keywordres;
ChineseSegmentation::m_jieba->extractor.Extract(str, keywordres, topk);
vector<string> ChineseSegmentationPrivate::callMixSegmentCutStr(const string &sentence)
{
vector<string> keywordres;
ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
return keywordres;
}
void ChineseSegmentation::convert(std::vector<cppjieba::KeyWord> &keywordres, QVector<SKeyWord> &kw) {
for(auto i : keywordres) {
SKeyWord temp;
temp.word = i.word;
temp.offsets = QVector<size_t>::fromStdVector(i.offsets);
temp.weight = i.weight;
kw.append(temp);
vector<Word> ChineseSegmentationPrivate::callMixSegmentCutWord(const string &sentence)
{
vector<Word> keywordres;
ChineseSegmentationPrivate::m_jieba->Cut(sentence, keywordres);
return keywordres;
}
string ChineseSegmentationPrivate::lookUpTagOfWord(const string &word)
{
return ChineseSegmentationPrivate::m_jieba->LookupTag(word);
}
vector<pair<string, string>> ChineseSegmentationPrivate::getTagOfWordsInSentence(const string &sentence)
{
vector<pair<string, string>> words;
ChineseSegmentationPrivate::m_jieba->Tag(sentence, words);
return words;
}
vector<Word> ChineseSegmentationPrivate::callFullSegment(const string &sentence)
{
vector<Word> keywordres;
ChineseSegmentationPrivate::m_jieba->CutAll(sentence, keywordres);
return keywordres;
}
vector<Word> ChineseSegmentationPrivate::callQuerySegment(const string &sentence)
{
vector<Word> keywordres;
ChineseSegmentationPrivate::m_jieba->CutForSearch(sentence, keywordres);
return keywordres;
}
vector<Word> ChineseSegmentationPrivate::callHMMSegment(const string &sentence)
{
vector<Word> keywordres;
ChineseSegmentationPrivate::m_jieba->CutHMM(sentence, keywordres);
return keywordres;
}
vector<Word> ChineseSegmentationPrivate::callMPSegment(const string &sentence)
{
size_t maxWordLen = 512;
vector<Word> keywordres;
ChineseSegmentationPrivate::m_jieba->CutSmall(sentence, keywordres, maxWordLen);
return keywordres;
}
ChineseSegmentation *ChineseSegmentation::getInstance()
{
static ChineseSegmentation *global_instance_chinese_segmentation = new ChineseSegmentation;
return global_instance_chinese_segmentation;
}
vector<KeyWord> ChineseSegmentation::callSegment(const string &sentence)
{
return d->callSegment(sentence);
}
vector<string> ChineseSegmentation::callMixSegmentCutStr(const string &sentence)
{
return d->callMixSegmentCutStr(sentence);
}
vector<Word> ChineseSegmentation::callMixSegmentCutWord(const string &str)
{
return d->callMixSegmentCutWord(str);
}
string ChineseSegmentation::lookUpTagOfWord(const string &word)
{
return d->lookUpTagOfWord(word);
}
vector<pair<string, string> > ChineseSegmentation::getTagOfWordsInSentence(const string &sentence)
{
return d->getTagOfWordsInSentence(sentence);
}
vector<Word> ChineseSegmentation::callFullSegment(const string &sentence)
{
return d->callFullSegment(sentence);
}
vector<Word> ChineseSegmentation::callQuerySegment(const string &sentence)
{
return d->callQuerySegment(sentence);
}
vector<Word> ChineseSegmentation::callHMMSegment(const string &sentence)
{
return d->callHMMSegment(sentence);
}
vector<Word> ChineseSegmentation::callMPSegment(const string &sentence)
{
return d->callMPSegment(sentence);
}
ChineseSegmentation::ChineseSegmentation() : d(new ChineseSegmentationPrivate)
{
}

View File

@ -22,42 +22,95 @@
#define CHINESESEGMENTATION_H
#include "libchinese-segmentation_global.h"
#include "cppjieba/Jieba.hpp"
//#include "Logging.hpp"
//#include "LocalVector.hpp"
//#include "cppjieba/QuerySegment.hpp"
#include "cppjieba/KeywordExtractor.hpp"
#include <QVector>
#include <QString>
#include <QDebug>
#include <QMutex>
struct SKeyWord {
std::string word;
QVector<size_t> offsets;
double weight;
~SKeyWord() {
word = std::move("");
offsets.clear();
offsets.shrink_to_fit();
}
};
#include "common-struct.h"
class ChineseSegmentationPrivate;
class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
public:
static ChineseSegmentation *getInstance();
QVector<SKeyWord> callSegement(std::string s);
std::vector<cppjieba::KeyWord> callSegementStd(const std::string& str);
/**
* @brief ChineseSegmentation::callSegment
* extractor进行关键词提取使Mix方式初步分词使Idf词典进行关键词提取
*
* @param sentence
* @return vector<KeyWord>
*/
vector<KeyWord> callSegment(const string &sentence);
/**
* @brief ChineseSegmentation::callMixSegmentCutStr
* 使Mix方法进行分词使MP初步分词HMM进一步分词
*
* @param sentence
* @return vector<string>
*/
vector<string> callMixSegmentCutStr(const string& sentence);
/**
* @brief ChineseSegmentation::callMixSegmentCutWord
* callMixSegmentCutStr功能相同
* @param sentence
* @return vector<Word>
*/
vector<Word> callMixSegmentCutWord(const string& str);
/**
* @brief ChineseSegmentation::lookUpTagOfWord
* word的词性
* @param word
* @return string word的词性
*/
string lookUpTagOfWord(const string& word);
/**
* @brief ChineseSegmentation::getTagOfWordsInSentence
* 使Mix分词后获取每个词的词性
* @param sentence
* @return vector<pair<string, string>> (firsr)(second)
*/
vector<pair<string, string>> getTagOfWordsInSentence(const string &sentence);
/**
* @brief ChineseSegmentation::callFullSegment
* 使Full进行分词Full会切出字典里所有的词
* @param sentence
* @return vector<Word>
*/
vector<Word> callFullSegment(const string& sentence);
/**
* @brief ChineseSegmentation::callQuerySegment
* 使Query进行分词使MixFull
* @param sentence
* @return vector<Word>
*/
vector<Word> callQuerySegment(const string& sentence);
/**
* @brief ChineseSegmentation::callHMMSegment
* 使HMM进行分词
* @param sentence
* @return vector<Word>
*/
vector<Word> callHMMSegment(const string& sentence);
/**
* @brief ChineseSegmentation::callMPSegment
* 使MP进行分词
* @param sentence
* @return vector<Word>
*/
vector<Word> callMPSegment(const string& sentence);
private:
explicit ChineseSegmentation();
~ChineseSegmentation();
void convert(std::vector<cppjieba::KeyWord>& keywordres, QVector<SKeyWord>& kw);
~ChineseSegmentation() = default;
ChineseSegmentation(const ChineseSegmentation&) = delete;
ChineseSegmentation& operator =(const ChineseSegmentation&) = delete;
private:
static QMutex m_mutex;
cppjieba::Jieba *m_jieba;
ChineseSegmentationPrivate *d = nullptr;
};
#endif // CHINESESEGMENTATION_H

View File

@ -0,0 +1,52 @@
#ifndef COMMONSTRUCT_H
#define COMMONSTRUCT_H
#include <string>
#include <vector>
using namespace std;
/**
* @brief The KeyWord struct
*
* @property word the content of keyword
* @property offsets the Unicode offsets, can be used to check the word pos in a sentence
* @property weight the weight of the keyword
*/
struct KeyWord {
string word;
vector<size_t> offsets;
double weight;
~KeyWord() {
word = std::move("");
offsets.clear();
offsets.shrink_to_fit();
}
};
/**
* @brief The Word struct
*
* @property word the content of word
* @property offset the offset of the word(absolute pos, Chinese 3 , English 1) can be used to check the word pos in a sentence
* @property unicode_offset the Unicode offset of the word
* @property unicode_length the Unicode length of the word
*/
struct Word {
string word;
uint32_t offset;
uint32_t unicode_offset;
uint32_t unicode_length;
Word(const string& w, uint32_t o)
: word(w), offset(o) {
}
Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
: word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
}
~Word() {
word = std::move("");
}
}; // struct Word
#endif // COMMONSTRUCT_H

View File

@ -63,7 +63,7 @@ public:
return false;
}
bool isMultiTone(string &word) {
bool isMultiTone(const string &word) {
if (qmap_chinese2pinyin.contains(QString::fromStdString(word)))
return true;
// if (map_chinese2pinyin.contains(word))

View File

@ -7,6 +7,7 @@
#include <ostream>
#include "limonp/LocalVector.hpp"
#include "limonp/StringUtil.hpp"
#include "common-struct.h"
namespace cppjieba {
@ -15,24 +16,24 @@ using std::vector;
typedef uint32_t Rune;
struct KeyWord {
string word;
vector<size_t> offsets;
double weight;
}; // struct Word
//struct KeyWord {
// string word;
// vector<size_t> offsets;
// double weight;
//}; // struct Word
struct Word {
string word;
uint32_t offset;
uint32_t unicode_offset;
uint32_t unicode_length;
Word(const string& w, uint32_t o)
: word(w), offset(o) {
}
Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
: word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
}
}; // struct Word
//struct Word {
// string word;
// uint32_t offset;
// uint32_t unicode_offset;
// uint32_t unicode_length;
// Word(const string& w, uint32_t o)
// : word(w), offset(o) {
// }
// Word(const string& w, uint32_t o, uint32_t unicode_offset, uint32_t unicode_length)
// : word(w), offset(o), unicode_offset(unicode_offset), unicode_length(unicode_length) {
// }
//}; // struct Word
inline std::ostream& operator << (std::ostream& os, const Word& w) {
return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";

View File

@ -0,0 +1 @@
#include "chinese-segmentation.h"

View File

@ -0,0 +1 @@
#include "hanzi-to-pinyin.h"

View File

@ -0,0 +1,29 @@
#ifndef HANZITOPINYINPRIVATE_H
#define HANZITOPINYINPRIVATE_H
#include <QtCore/qglobal.h>
#include "cppjieba/PinYinTrie.hpp"
#include "hanzi-to-pinyin.h"
#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
using namespace std;
class PINYINMANAGER_EXPORT HanZiToPinYinPrivate
{
public:
HanZiToPinYinPrivate(HanZiToPinYin *parent = nullptr);
~HanZiToPinYinPrivate();
public:
template <typename T>
bool isMultiTone(T &&t) {return m_pinYinTrie->isMultiTone(std::forward<T>(t));}
bool contains(string &word);
int getResults(string word, QStringList &results);
private:
cppjieba::PinYinTrie *m_pinYinTrie = nullptr;
HanZiToPinYin *q = nullptr;
};
#endif // HANZITOPINYINPRIVATE_H

View File

@ -0,0 +1,83 @@
#include "hanzi-to-pinyin.h"
#include "hanzi-to-pinyin-private.h"
#include <mutex>
HanZiToPinYin * HanZiToPinYin::g_pinYinManager = nullptr;
std::once_flag g_singleFlag;
bool HanZiToPinYinPrivate::contains(string &word)
{
return m_pinYinTrie->contains(word);
}
int HanZiToPinYinPrivate::getResults(string word, QStringList &results)
{
results.clear();
if (-1 != m_pinYinTrie->getMultiTonResults(word, results)) {
return 0;
}
QString tmp;
if (-1 != m_pinYinTrie->getSingleTonResult(word, tmp)) {
results.append(tmp);
return 0;
}
return -1;
}
HanZiToPinYinPrivate::HanZiToPinYinPrivate(HanZiToPinYin *parent) : q(parent)
{
const char * const PINYIN_PATH = "/usr/share/ukui-search/res/dict/pinyinWithoutTone.txt";
m_pinYinTrie = new cppjieba::PinYinTrie(PINYIN_PATH);
}
HanZiToPinYinPrivate::~HanZiToPinYinPrivate()
{
if (m_pinYinTrie){
delete m_pinYinTrie;
m_pinYinTrie = nullptr;
}
}
HanZiToPinYin * HanZiToPinYin::getInstance()
{
call_once(g_singleFlag, []() {
g_pinYinManager = new HanZiToPinYin;
});
return g_pinYinManager;
}
bool HanZiToPinYin::contains(string &word)
{
return d->contains(word);
}
bool HanZiToPinYin::isMultiTone(string &word)
{
return d->isMultiTone(word);
}
bool HanZiToPinYin::isMultiTone(string &&word)
{
return d->isMultiTone(word);
}
bool HanZiToPinYin::isMultiTone(const string &word)
{
return d->isMultiTone(word);
}
bool HanZiToPinYin::isMultiTone(const string &&word)
{
return d->isMultiTone(word);
}
int HanZiToPinYin::getResults(string word, QStringList &results)
{
return d->getResults(word, results);
}
HanZiToPinYin::HanZiToPinYin() : d(new HanZiToPinYinPrivate)
{
}

View File

@ -0,0 +1,53 @@
#ifndef HANZITOPINYIN_H
#define HANZITOPINYIN_H
#include <QtCore/qglobal.h>
//#include "cppjieba/PinYinTrie.hpp"
#include <QStringList>
#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
using namespace std;
class HanZiToPinYinPrivate;
class PINYINMANAGER_EXPORT HanZiToPinYin
{
public:
static HanZiToPinYin * getInstance();
public:
/**
* @brief HanZiToPinYin::isMultiTone
* @param word
* @return bool false
*/
bool isMultiTone(string &word);
bool isMultiTone(string &&word);
bool isMultiTone(const string &word);
bool isMultiTone(const string &&word);
/**
* @brief HanZiToPinYin::contains
* @param word
* @return bool false
*/
bool contains(string &word);
/**
* @brief HanZiToPinYin::getResults
* @param word
* @param results word的拼音列表results会清空
* @return int 0-1
*/
int getResults(string word, QStringList &results);
protected:
HanZiToPinYin();
~HanZiToPinYin();
HanZiToPinYin(const HanZiToPinYin&) = delete;
HanZiToPinYin& operator =(const HanZiToPinYin&) = delete;
private:
static HanZiToPinYin *g_pinYinManager;
HanZiToPinYinPrivate *d = nullptr;
};
#endif // PINYINMANAGER_H

View File

@ -5,13 +5,16 @@ TARGET = chinese-segmentation
TEMPLATE = lib
DEFINES += LIBCHINESESEGMENTATION_LIBRARY
CONFIG += c++11
CONFIG += c++11 create_pc create_prl no_install_prl
# The following define makes your compiler emit warnings if you use
# any Qt feature that has been marked deprecated (the exact warnings
# depend on your compiler). Please consult the documentation of the
# deprecated API in order to know how to port your code away from it.
DEFINES += QT_DEPRECATED_WARNINGS
QMAKE_CXXFLAGS += -Werror=return-type -Werror=return-local-addr
#QMAKE_CXXFLAGS += -Werror=uninitialized
QMAKE_CXXFLAGS += -execution-charset:utf-8
# You can also make your code fail to compile if it uses deprecated APIs.
# In order to do so, uncomment the following line.
@ -23,12 +26,15 @@ include(cppjieba/cppjieba.pri)
SOURCES += \
chinese-segmentation.cpp \
pinyinmanager.cpp
hanzi-to-pinyin.cpp
HEADERS += \
chinese-segmentation-private.h \
chinese-segmentation.h \
libchinese-segmentation_global.h \
pinyinmanager.h
common-struct.h \
hanzi-to-pinyin-private.h \
hanzi-to-pinyin.h \
libchinese-segmentation_global.h
dict_files.path = /usr/share/ukui-search/res/dict/
dict_files.files = $$PWD/dict/*.utf8\
@ -41,14 +47,24 @@ INSTALLS += \
# Default rules for deployment.
unix {
target.path = $$[QT_INSTALL_LIBS]
}
QMAKE_PKGCONFIG_NAME = chinese-segmentation
QMAKE_PKGCONFIG_DESCRIPTION = chinese-segmentation Header files
QMAKE_PKGCONFIG_VERSION = $$VERSION
QMAKE_PKGCONFIG_LIBDIR = $$target.path
QMAKE_PKGCONFIG_DESTDIR = pkgconfig
QMAKE_PKGCONFIG_INCDIR = /usr/include/chinese-seg
QMAKE_PKGCONFIG_CFLAGS += -I/usr/include/chinese-seg
!isEmpty(target.path): INSTALLS += target
header.path = /usr/include/chinese-seg/
header.files += *.h
headercppjieba.path = /usr/include/chinese-seg/cppjieba/
headercppjieba.files = cppjieba/*
INSTALLS += header headercppjieba
header.path = /usr/include/chinese-seg
header.files += chinese-segmentation.h libchinese-segmentation_global.h common-struct.h hanzi-to-pinyin.h
header.files += development-files/header-files/*
# headercppjieba.path = /usr/include/chinese-seg/cppjieba/
# headercppjieba.files = cppjieba/*
INSTALLS += header
}
#DISTFILES += \
# jiaba/jieba.pri
@ -64,5 +80,5 @@ DISTFILES += \
dict/pos_dict/prob_trans.utf8 \
dict/stop_words.utf8 \
dict/user.dict.utf8 \
dict/pinyinWithoutTone.txt
dict/pinyinWithoutTone.txt \
development-files/header-files/* \

View File

@ -1,55 +0,0 @@
#include "pinyinmanager.h"
#include <mutex>
PinYinManager * PinYinManager::g_pinYinManager = nullptr;
std::once_flag g_singleFlag;
PinYinManager * PinYinManager::getInstance()
{
call_once(g_singleFlag, []() {
g_pinYinManager = new PinYinManager;
});
return g_pinYinManager;
}
bool PinYinManager::contains(string &word)
{
return m_pinYinTrie->contains(word);
}
bool PinYinManager::isMultiTon(string &word)
{
return m_pinYinTrie->isMultiTone(word);
}
bool PinYinManager::isMultiTon(string word)
{
return m_pinYinTrie->isMultiTone(word);
}
int PinYinManager::getResults(string word, QStringList &results)
{
results.clear();
if (-1 != m_pinYinTrie->getMultiTonResults(word, results)) {
return 0;
}
QString tmp;
if (-1 != m_pinYinTrie->getSingleTonResult(word, tmp)) {
results.append(tmp);
return 0;
}
return -1;
}
PinYinManager::PinYinManager()
{
const char * const PINYIN_PATH = "/usr/share/ukui-search/res/dict/pinyinWithoutTone.txt";
m_pinYinTrie = new cppjieba::PinYinTrie(PINYIN_PATH);
}
PinYinManager::~PinYinManager()
{
if (m_pinYinTrie){
delete m_pinYinTrie;
m_pinYinTrie = nullptr;
}
}

View File

@ -1,33 +0,0 @@
#ifndef PINYINMANAGER_H
#define PINYINMANAGER_H
#include <QtCore/qglobal.h>
#include "cppjieba/PinYinTrie.hpp"
#define PINYINMANAGER_EXPORT Q_DECL_IMPORT
using namespace std;
class PINYINMANAGER_EXPORT PinYinManager
{
public:
static PinYinManager * getInstance();
public:
bool contains(string &word);
bool isMultiTon(string &word);
bool isMultiTon(string word);
int getResults(string word, QStringList &results);
protected:
PinYinManager();
~PinYinManager();
private:
static PinYinManager *g_pinYinManager;
cppjieba::PinYinTrie *m_pinYinTrie = nullptr;
};
#endif // PINYINMANAGER_H

View File

@ -27,7 +27,7 @@
#include <QDBusConnection>
#include <QDomDocument>
#include "gobject-template.h"
#include "pinyinmanager.h"
#include "hanzi-to-pinyin.h"
using namespace UkuiSearch;
size_t FileUtils::maxIndexCount = 0;
@ -413,14 +413,14 @@ void stitchMultiToneWordsBFSStackLess3(const QString &hanzi, QStringList &result
int multiToneWordNum = 0;
for (auto i:hanzi) {
if (PinYinManager::getInstance()->isMultiTon(QString(i).toStdString()))
if (HanZiToPinYin::getInstance()->isMultiTone(QString(i).toStdString()))
++multiToneWordNum;
}
if(multiToneWordNum > 3) {
QString oneResult, oneResultFirst;
for(auto i : hanzi) {
QStringList results;
PinYinManager::getInstance()->getResults(QString(i).toStdString(), results);
HanZiToPinYin::getInstance()->getResults(QString(i).toStdString(), results);
if(results.size()) {
oneResult += results.first();
oneResultFirst += results.first().at(0);
@ -435,7 +435,7 @@ void stitchMultiToneWordsBFSStackLess3(const QString &hanzi, QStringList &result
}
QStringList results;
PinYinManager::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
HanZiToPinYin::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
if(results.size()) {
for(auto i : results) {
tempQueue.enqueue(i);
@ -447,7 +447,7 @@ void stitchMultiToneWordsBFSStackLess3(const QString &hanzi, QStringList &result
}
tempHanzi = tempHanzi.right(tempHanzi.size() - 1);
while(tempHanzi.size() != 0) {
PinYinManager::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
HanZiToPinYin::getInstance()->getResults(QString(tempHanzi.at(0)).toStdString(), results);
tempQueueSize = tempQueue.size();
if(results.size()) {
for(int j = 0; j < tempQueueSize; ++j) {

View File

@ -118,7 +118,7 @@ void ConstructDocumentForContent::run() {
doc.setData(content);
//'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
std::vector<cppjieba::KeyWord> term = ChineseSegmentation::getInstance()->callSegementStd(content.left(20480000).toStdString());
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString());
for(size_t i = 0; i < term.size(); ++i) {
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
}
@ -158,7 +158,7 @@ void ConstructDocumentForOcr::run()
doc.setData(content);
//'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
std::vector<cppjieba::KeyWord> term = ChineseSegmentation::getInstance()->callSegementStd(content.toStdString());
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.toStdString());
for(size_t i = 0; i < term.size(); ++i) {
doc.addPosting(term.at(i).word, term.at(i).offsets, static_cast<int>(term.at(i).weight));
}

View File

@ -414,15 +414,15 @@ Document IndexGenerator::GenerateContentDocument(const QString &path) {
// 构造文本索引的document
QString content;
QStringList tmp;
QVector<SKeyWord> term;
SKeyWord skw;
std::vector<KeyWord> term;
KeyWord skw;
Document doc;
QString uniqueterm;
QString upTerm;
QString suffix;
FileReader::getTextContent(path, content, suffix);
term = ChineseSegmentation::getInstance()->callSegement(content.toStdString());
term = ChineseSegmentation::getInstance()->callSegment(content.toStdString());
// QStringList term = content.split("");
doc.setData(content);

View File

@ -272,7 +272,7 @@ int FileContentSearch::keywordSearchContent() {
qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db);
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
//Creat a query
std::string words;
for(int i = 0; i < sKeyWord.size(); i++) {
@ -446,7 +446,7 @@ int OcrSearch::keywordSearchOcr() {
Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db);
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
//Creat a query
std::string words;
for(int i = 0; i < sKeyWord.size(); i++) {

View File

@ -154,9 +154,9 @@ NoteSearch::NoteSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, c
}
void NoteSearch::run() {
QVector<SKeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
std::vector<KeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
QStringList keywordList;
for (SKeyWord sKeyWord : sKeyWordVec) {
for (KeyWord sKeyWord : sKeyWordVec) {
keywordList.append(QString::fromStdString(sKeyWord.word));
}
QDBusInterface qi("org.ukui.note", "/org/ukui/note", "org.ukui.note.interface", QDBusConnection::sessionBus());

View File

@ -165,7 +165,7 @@ inline Xapian::Query FileContentSearchWorker::createQuery()
std::vector<Xapian::Query> v;
for (const auto &keyword : m_searchController->getKeyword()) {
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(keyword.toStdString());
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(keyword.toStdString());
for(const auto & c : sKeyWord) {
v.emplace_back(c.word);