From 0f315b0f239721d3dd939ff63895697c277b8d5c Mon Sep 17 00:00:00 2001 From: zhangpengfei Date: Mon, 4 Jan 2021 14:35:04 +0800 Subject: [PATCH] Optimized segment lib. --- .../chinese-segmentation.cpp | 29 ++++++++++++++----- .../chinese-segmentation.h | 9 ++++-- libsearch/index/file-searcher.cpp | 2 +- libsearch/index/filetypefilter.h | 2 +- libsearch/index/index-generator.cpp | 2 +- src/resource.qrc | 3 -- src/src.pro | 6 ++-- 7 files changed, 33 insertions(+), 20 deletions(-) diff --git a/libchinese-segmentation/chinese-segmentation.cpp b/libchinese-segmentation/chinese-segmentation.cpp index 0f22d87..aa615fb 100644 --- a/libchinese-segmentation/chinese-segmentation.cpp +++ b/libchinese-segmentation/chinese-segmentation.cpp @@ -1,11 +1,8 @@ #include "chinese-segmentation.h" #include +static ChineseSegmentation *global_instance = nullptr; ChineseSegmentation::ChineseSegmentation() -{ -} - -QVector ChineseSegmentation::callSegement(QString *str) { const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8"; const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8"; @@ -13,19 +10,35 @@ QVector ChineseSegmentation::callSegement(QString *str) const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8"; const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8"; - - cppjieba::Jieba jieba(DICT_PATH, + m_jieba = new cppjieba::Jieba(DICT_PATH, HMM_PATH, USER_DICT_PATH, IDF_PATH, - STOP_WORD_PATH); + STOP_WORD_PATH); +} +ChineseSegmentation::~ChineseSegmentation() +{ + if(m_jieba) + delete m_jieba; +} + +ChineseSegmentation *ChineseSegmentation::getInstance() +{ + if (!global_instance) { + global_instance = new ChineseSegmentation; + } + return global_instance; +} + +QVector ChineseSegmentation::callSegement(QString *str) +{ std::string s; s=str->toStdString(); const size_t topk = -1; std::vector keywordres; - jieba.extractor.Extract(s, keywordres, topk); + ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk); QVector vecNeeds; convert(keywordres, vecNeeds); diff --git a/libchinese-segmentation/chinese-segmentation.h b/libchinese-segmentation/chinese-segmentation.h index 5e3f8eb..ef116e4 100644 --- a/libchinese-segmentation/chinese-segmentation.h +++ b/libchinese-segmentation/chinese-segmentation.h @@ -20,10 +20,13 @@ struct SKeyWord{ class CHINESESEGMENTATION_EXPORT ChineseSegmentation { public: - static QVector callSegement(QString *str); - static void convert(std::vector& keywordres,QVector& kw); + static ChineseSegmentation *getInstance(); + QVector callSegement(QString *str); + void convert(std::vector& keywordres,QVector& kw); private: - ChineseSegmentation(); + cppjieba::Jieba *m_jieba; + explicit ChineseSegmentation(); + ~ChineseSegmentation(); }; #endif // CHINESESEGMENTATION_H diff --git a/libsearch/index/file-searcher.cpp b/libsearch/index/file-searcher.cpp index 9da3860..d4ea1bb 100644 --- a/libsearch/index/file-searcher.cpp +++ b/libsearch/index/file-searcher.cpp @@ -87,7 +87,7 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num) qp.set_default_op(Xapian::Query::OP_AND); qp.set_database(db); - QVector sKeyWord = ChineseSegmentation::callSegement(&keyword); + QVector sKeyWord = ChineseSegmentation::getInstance()->callSegement(&keyword); //Creat a query std::string words; for(int i=0;i targetFileTypeVec ={ // QString(".doc"), -// QString(".docx"), + QString(".docx"), // QString(".ppt"), // QString(".pptx"), // QString(".xls"), diff --git a/libsearch/index/index-generator.cpp b/libsearch/index/index-generator.cpp index 32642dc..e63ed88 100644 --- a/libsearch/index/index-generator.cpp +++ b/libsearch/index/index-generator.cpp @@ -216,7 +216,7 @@ Document IndexGenerator::GenerateContentDocument(const QString &path) // 构造文本索引的document QString *content = FileReader::getTextContent(path); QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path)); - QVector term = ChineseSegmentation::callSegement(content); + QVector term = ChineseSegmentation::getInstance()->callSegement(content); Document doc; doc.setData(*content); doc.setUniqueTerm(uniqueterm); diff --git a/src/resource.qrc b/src/resource.qrc index df637d4..37060e7 100644 --- a/src/resource.qrc +++ b/src/resource.qrc @@ -2,9 +2,6 @@ res/icons/commonuse.svg res/icons/edit-find-symbolic.svg - res/translations/bo.ts - res/translations/tr.ts - res/translations/zh_CN.ts res/icons/desktop.png res/icons/close.svg diff --git a/src/src.pro b/src/src.pro index 6f5792e..4159a65 100644 --- a/src/src.pro +++ b/src/src.pro @@ -39,8 +39,8 @@ HEADERS += \ settings-widget.h # Default rules for deployment. -qnx: target.path = /tmp/$${TARGET}/bin -else: unix:!android: target.path = /opt/$${TARGET}/bin + +target.path = /usr/bin !isEmpty(target.path): INSTALLS += target RESOURCES += \ @@ -52,7 +52,7 @@ TRANSLATIONS += \ res/translations/bo.ts qm_files.path = /usr/share/ukui-search/res/translations/ -qm_files.files = res/translations/*.qm\ +qm_files.files = $$OUT_PWD/res/translations/*.qm INSTALLS += \ qm_files \