2020-12-30 14:42:04 +08:00
|
|
|
#include "chinese-segmentation.h"
|
2020-12-31 21:14:13 +08:00
|
|
|
#include <QFileInfo>
|
2021-01-04 14:35:04 +08:00
|
|
|
static ChineseSegmentation *global_instance = nullptr;
|
2020-12-30 14:42:04 +08:00
|
|
|
|
|
|
|
ChineseSegmentation::ChineseSegmentation()
|
2020-12-31 21:14:13 +08:00
|
|
|
{
|
|
|
|
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
|
|
|
|
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
|
|
|
|
const char * const USER_DICT_PATH ="/usr/share/ukui-search/res/dict/user.dict.utf8";
|
|
|
|
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
|
|
|
|
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
|
|
|
|
|
2021-01-04 14:35:04 +08:00
|
|
|
m_jieba = new cppjieba::Jieba(DICT_PATH,
|
2020-12-31 21:14:13 +08:00
|
|
|
HMM_PATH,
|
|
|
|
USER_DICT_PATH,
|
|
|
|
IDF_PATH,
|
2021-01-04 14:35:04 +08:00
|
|
|
STOP_WORD_PATH);
|
|
|
|
}
|
2020-12-31 21:14:13 +08:00
|
|
|
|
2021-01-04 14:35:04 +08:00
|
|
|
ChineseSegmentation::~ChineseSegmentation()
|
|
|
|
{
|
|
|
|
if(m_jieba)
|
|
|
|
delete m_jieba;
|
|
|
|
}
|
|
|
|
|
|
|
|
ChineseSegmentation *ChineseSegmentation::getInstance()
|
|
|
|
{
|
|
|
|
if (!global_instance) {
|
|
|
|
global_instance = new ChineseSegmentation;
|
|
|
|
}
|
|
|
|
return global_instance;
|
|
|
|
}
|
|
|
|
|
|
|
|
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
|
|
|
|
{
|
2020-12-31 21:14:13 +08:00
|
|
|
std::string s;
|
|
|
|
s=str->toStdString();
|
|
|
|
|
|
|
|
const size_t topk = -1;
|
|
|
|
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
2021-01-04 14:35:04 +08:00
|
|
|
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
|
2020-12-31 21:14:13 +08:00
|
|
|
QVector<SKeyWord> vecNeeds;
|
|
|
|
convert(keywordres, vecNeeds);
|
|
|
|
|
|
|
|
return vecNeeds;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void ChineseSegmentation::convert(std::vector<cppjieba::KeywordExtractor::Word> &keywordres, QVector<SKeyWord> &kw)
|
|
|
|
{
|
|
|
|
for (auto i : keywordres){
|
|
|
|
SKeyWord temp;
|
|
|
|
temp.word = i.word;
|
|
|
|
temp.offsets = QVector<size_t>::fromStdVector(i.offsets);
|
|
|
|
temp.weight = i.weight;
|
|
|
|
kw.append(temp);
|
|
|
|
}
|
|
|
|
}
|