Optimized segment lib.

This commit is contained in:
zhangpengfei 2021-01-04 14:35:04 +08:00
parent bcc226a650
commit 0f315b0f23
7 changed files with 33 additions and 20 deletions

View File

@ -1,11 +1,8 @@
#include "chinese-segmentation.h" #include "chinese-segmentation.h"
#include <QFileInfo> #include <QFileInfo>
static ChineseSegmentation *global_instance = nullptr;
ChineseSegmentation::ChineseSegmentation() ChineseSegmentation::ChineseSegmentation()
{
}
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
{ {
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8"; const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8"; const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
@ -13,19 +10,35 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8"; const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8"; const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
m_jieba = new cppjieba::Jieba(DICT_PATH,
cppjieba::Jieba jieba(DICT_PATH,
HMM_PATH, HMM_PATH,
USER_DICT_PATH, USER_DICT_PATH,
IDF_PATH, IDF_PATH,
STOP_WORD_PATH); STOP_WORD_PATH);
}
ChineseSegmentation::~ChineseSegmentation()
{
if(m_jieba)
delete m_jieba;
}
ChineseSegmentation *ChineseSegmentation::getInstance()
{
if (!global_instance) {
global_instance = new ChineseSegmentation;
}
return global_instance;
}
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
{
std::string s; std::string s;
s=str->toStdString(); s=str->toStdString();
const size_t topk = -1; const size_t topk = -1;
std::vector<cppjieba::KeywordExtractor::Word> keywordres; std::vector<cppjieba::KeywordExtractor::Word> keywordres;
jieba.extractor.Extract(s, keywordres, topk); ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
QVector<SKeyWord> vecNeeds; QVector<SKeyWord> vecNeeds;
convert(keywordres, vecNeeds); convert(keywordres, vecNeeds);

View File

@ -20,10 +20,13 @@ struct SKeyWord{
class CHINESESEGMENTATION_EXPORT ChineseSegmentation class CHINESESEGMENTATION_EXPORT ChineseSegmentation
{ {
public: public:
static QVector<SKeyWord> callSegement(QString *str); static ChineseSegmentation *getInstance();
static void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw); QVector<SKeyWord> callSegement(QString *str);
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
private: private:
ChineseSegmentation(); cppjieba::Jieba *m_jieba;
explicit ChineseSegmentation();
~ChineseSegmentation();
}; };
#endif // CHINESESEGMENTATION_H #endif // CHINESESEGMENTATION_H

View File

@ -87,7 +87,7 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
qp.set_default_op(Xapian::Query::OP_AND); qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db); qp.set_database(db);
QVector<SKeyWord> sKeyWord = ChineseSegmentation::callSegement(&keyword); QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(&keyword);
//Creat a query //Creat a query
std::string words; std::string words;
for(int i=0;i<sKeyWord.size();i++) for(int i=0;i<sKeyWord.size();i++)

View File

@ -21,7 +21,7 @@ Q_SIGNALS:
private: private:
const QVector<QString> targetFileTypeVec ={ const QVector<QString> targetFileTypeVec ={
// QString(".doc"), // QString(".doc"),
// QString(".docx"), QString(".docx"),
// QString(".ppt"), // QString(".ppt"),
// QString(".pptx"), // QString(".pptx"),
// QString(".xls"), // QString(".xls"),

View File

@ -216,7 +216,7 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
// 构造文本索引的document // 构造文本索引的document
QString *content = FileReader::getTextContent(path); QString *content = FileReader::getTextContent(path);
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path)); QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
QVector<SKeyWord> term = ChineseSegmentation::callSegement(content); QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
Document doc; Document doc;
doc.setData(*content); doc.setData(*content);
doc.setUniqueTerm(uniqueterm); doc.setUniqueTerm(uniqueterm);

View File

@ -2,9 +2,6 @@
<qresource prefix="/"> <qresource prefix="/">
<file>res/icons/commonuse.svg</file> <file>res/icons/commonuse.svg</file>
<file>res/icons/edit-find-symbolic.svg</file> <file>res/icons/edit-find-symbolic.svg</file>
<file>res/translations/bo.ts</file>
<file>res/translations/tr.ts</file>
<file>res/translations/zh_CN.ts</file>
<file>res/icons/desktop.png</file> <file>res/icons/desktop.png</file>
<file>res/icons/close.svg</file> <file>res/icons/close.svg</file>
</qresource> </qresource>

View File

@ -39,8 +39,8 @@ HEADERS += \
settings-widget.h settings-widget.h
# Default rules for deployment. # Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin target.path = /usr/bin
!isEmpty(target.path): INSTALLS += target !isEmpty(target.path): INSTALLS += target
RESOURCES += \ RESOURCES += \
@ -52,7 +52,7 @@ TRANSLATIONS += \
res/translations/bo.ts res/translations/bo.ts
qm_files.path = /usr/share/ukui-search/res/translations/ qm_files.path = /usr/share/ukui-search/res/translations/
qm_files.files = res/translations/*.qm\ qm_files.files = $$OUT_PWD/res/translations/*.qm
INSTALLS += \ INSTALLS += \
qm_files \ qm_files \