Optimized segment lib.

This commit is contained in:
zhangpengfei 2021-01-04 14:35:04 +08:00
parent bcc226a650
commit 0f315b0f23
7 changed files with 33 additions and 20 deletions

View File

@ -1,11 +1,8 @@
#include "chinese-segmentation.h"
#include <QFileInfo>
static ChineseSegmentation *global_instance = nullptr;
ChineseSegmentation::ChineseSegmentation()
{
}
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
{
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
@ -13,19 +10,35 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
cppjieba::Jieba jieba(DICT_PATH,
m_jieba = new cppjieba::Jieba(DICT_PATH,
HMM_PATH,
USER_DICT_PATH,
IDF_PATH,
STOP_WORD_PATH);
STOP_WORD_PATH);
}
ChineseSegmentation::~ChineseSegmentation()
{
if(m_jieba)
delete m_jieba;
}
ChineseSegmentation *ChineseSegmentation::getInstance()
{
if (!global_instance) {
global_instance = new ChineseSegmentation;
}
return global_instance;
}
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
{
std::string s;
s=str->toStdString();
const size_t topk = -1;
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
jieba.extractor.Extract(s, keywordres, topk);
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
QVector<SKeyWord> vecNeeds;
convert(keywordres, vecNeeds);

View File

@ -20,10 +20,13 @@ struct SKeyWord{
class CHINESESEGMENTATION_EXPORT ChineseSegmentation
{
public:
static QVector<SKeyWord> callSegement(QString *str);
static void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
static ChineseSegmentation *getInstance();
QVector<SKeyWord> callSegement(QString *str);
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
private:
ChineseSegmentation();
cppjieba::Jieba *m_jieba;
explicit ChineseSegmentation();
~ChineseSegmentation();
};
#endif // CHINESESEGMENTATION_H

View File

@ -87,7 +87,7 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db);
QVector<SKeyWord> sKeyWord = ChineseSegmentation::callSegement(&keyword);
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(&keyword);
//Creat a query
std::string words;
for(int i=0;i<sKeyWord.size();i++)

View File

@ -21,7 +21,7 @@ Q_SIGNALS:
private:
const QVector<QString> targetFileTypeVec ={
// QString(".doc"),
// QString(".docx"),
QString(".docx"),
// QString(".ppt"),
// QString(".pptx"),
// QString(".xls"),

View File

@ -216,7 +216,7 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
// 构造文本索引的document
QString *content = FileReader::getTextContent(path);
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
QVector<SKeyWord> term = ChineseSegmentation::callSegement(content);
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
Document doc;
doc.setData(*content);
doc.setUniqueTerm(uniqueterm);

View File

@ -2,9 +2,6 @@
<qresource prefix="/">
<file>res/icons/commonuse.svg</file>
<file>res/icons/edit-find-symbolic.svg</file>
<file>res/translations/bo.ts</file>
<file>res/translations/tr.ts</file>
<file>res/translations/zh_CN.ts</file>
<file>res/icons/desktop.png</file>
<file>res/icons/close.svg</file>
</qresource>

View File

@ -39,8 +39,8 @@ HEADERS += \
settings-widget.h
# Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin
target.path = /usr/bin
!isEmpty(target.path): INSTALLS += target
RESOURCES += \
@ -52,7 +52,7 @@ TRANSLATIONS += \
res/translations/bo.ts
qm_files.path = /usr/share/ukui-search/res/translations/
qm_files.files = res/translations/*.qm\
qm_files.files = $$OUT_PWD/res/translations/*.qm
INSTALLS += \
qm_files \