ukui-search/libchinese-segmentation/chinese-segmentation.cpp

59 lines
1.7 KiB
C++
Raw Normal View History

2020-12-30 14:42:04 +08:00
#include "chinese-segmentation.h"
#include <QFileInfo>
2021-01-04 14:35:04 +08:00
static ChineseSegmentation *global_instance = nullptr;
2020-12-30 14:42:04 +08:00
ChineseSegmentation::ChineseSegmentation()
{
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
const char * const USER_DICT_PATH ="/usr/share/ukui-search/res/dict/user.dict.utf8";
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
2021-01-04 14:35:04 +08:00
m_jieba = new cppjieba::Jieba(DICT_PATH,
HMM_PATH,
USER_DICT_PATH,
IDF_PATH,
2021-01-04 14:35:04 +08:00
STOP_WORD_PATH);
}
2021-01-04 14:35:04 +08:00
ChineseSegmentation::~ChineseSegmentation()
{
if(m_jieba)
delete m_jieba;
}
ChineseSegmentation *ChineseSegmentation::getInstance()
{
if (!global_instance) {
global_instance = new ChineseSegmentation;
}
return global_instance;
}
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
{
std::string s;
s=str->toStdString();
const size_t topk = -1;
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
2021-01-04 14:35:04 +08:00
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
QVector<SKeyWord> vecNeeds;
convert(keywordres, vecNeeds);
return vecNeeds;
}
void ChineseSegmentation::convert(std::vector<cppjieba::KeywordExtractor::Word> &keywordres, QVector<SKeyWord> &kw)
{
for (auto i : keywordres){
SKeyWord temp;
temp.word = i.word;
temp.offsets = QVector<size_t>::fromStdVector(i.offsets);
temp.weight = i.weight;
kw.append(temp);
}
}