Optimized segment lib.
This commit is contained in:
parent
bcc226a650
commit
0f315b0f23
|
@ -1,11 +1,8 @@
|
|||
#include "chinese-segmentation.h"
|
||||
#include <QFileInfo>
|
||||
static ChineseSegmentation *global_instance = nullptr;
|
||||
|
||||
ChineseSegmentation::ChineseSegmentation()
|
||||
{
|
||||
}
|
||||
|
||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
|
||||
{
|
||||
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
|
||||
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
|
||||
|
@ -13,19 +10,35 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
|
|||
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
|
||||
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
|
||||
|
||||
|
||||
cppjieba::Jieba jieba(DICT_PATH,
|
||||
m_jieba = new cppjieba::Jieba(DICT_PATH,
|
||||
HMM_PATH,
|
||||
USER_DICT_PATH,
|
||||
IDF_PATH,
|
||||
STOP_WORD_PATH);
|
||||
STOP_WORD_PATH);
|
||||
}
|
||||
|
||||
ChineseSegmentation::~ChineseSegmentation()
|
||||
{
|
||||
if(m_jieba)
|
||||
delete m_jieba;
|
||||
}
|
||||
|
||||
ChineseSegmentation *ChineseSegmentation::getInstance()
|
||||
{
|
||||
if (!global_instance) {
|
||||
global_instance = new ChineseSegmentation;
|
||||
}
|
||||
return global_instance;
|
||||
}
|
||||
|
||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
|
||||
{
|
||||
std::string s;
|
||||
s=str->toStdString();
|
||||
|
||||
const size_t topk = -1;
|
||||
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
||||
jieba.extractor.Extract(s, keywordres, topk);
|
||||
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
|
||||
QVector<SKeyWord> vecNeeds;
|
||||
convert(keywordres, vecNeeds);
|
||||
|
||||
|
|
|
@ -20,10 +20,13 @@ struct SKeyWord{
|
|||
class CHINESESEGMENTATION_EXPORT ChineseSegmentation
|
||||
{
|
||||
public:
|
||||
static QVector<SKeyWord> callSegement(QString *str);
|
||||
static void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
|
||||
static ChineseSegmentation *getInstance();
|
||||
QVector<SKeyWord> callSegement(QString *str);
|
||||
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
|
||||
private:
|
||||
ChineseSegmentation();
|
||||
cppjieba::Jieba *m_jieba;
|
||||
explicit ChineseSegmentation();
|
||||
~ChineseSegmentation();
|
||||
};
|
||||
|
||||
#endif // CHINESESEGMENTATION_H
|
||||
|
|
|
@ -87,7 +87,7 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
|||
qp.set_default_op(Xapian::Query::OP_AND);
|
||||
qp.set_database(db);
|
||||
|
||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::callSegement(&keyword);
|
||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(&keyword);
|
||||
//Creat a query
|
||||
std::string words;
|
||||
for(int i=0;i<sKeyWord.size();i++)
|
||||
|
|
|
@ -21,7 +21,7 @@ Q_SIGNALS:
|
|||
private:
|
||||
const QVector<QString> targetFileTypeVec ={
|
||||
// QString(".doc"),
|
||||
// QString(".docx"),
|
||||
QString(".docx"),
|
||||
// QString(".ppt"),
|
||||
// QString(".pptx"),
|
||||
// QString(".xls"),
|
||||
|
|
|
@ -216,7 +216,7 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
|
|||
// 构造文本索引的document
|
||||
QString *content = FileReader::getTextContent(path);
|
||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
|
||||
QVector<SKeyWord> term = ChineseSegmentation::callSegement(content);
|
||||
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
|
||||
Document doc;
|
||||
doc.setData(*content);
|
||||
doc.setUniqueTerm(uniqueterm);
|
||||
|
|
|
@ -2,9 +2,6 @@
|
|||
<qresource prefix="/">
|
||||
<file>res/icons/commonuse.svg</file>
|
||||
<file>res/icons/edit-find-symbolic.svg</file>
|
||||
<file>res/translations/bo.ts</file>
|
||||
<file>res/translations/tr.ts</file>
|
||||
<file>res/translations/zh_CN.ts</file>
|
||||
<file>res/icons/desktop.png</file>
|
||||
<file>res/icons/close.svg</file>
|
||||
</qresource>
|
||||
|
|
|
@ -39,8 +39,8 @@ HEADERS += \
|
|||
settings-widget.h
|
||||
|
||||
# Default rules for deployment.
|
||||
qnx: target.path = /tmp/$${TARGET}/bin
|
||||
else: unix:!android: target.path = /opt/$${TARGET}/bin
|
||||
|
||||
target.path = /usr/bin
|
||||
!isEmpty(target.path): INSTALLS += target
|
||||
|
||||
RESOURCES += \
|
||||
|
@ -52,7 +52,7 @@ TRANSLATIONS += \
|
|||
res/translations/bo.ts
|
||||
|
||||
qm_files.path = /usr/share/ukui-search/res/translations/
|
||||
qm_files.files = res/translations/*.qm\
|
||||
qm_files.files = $$OUT_PWD/res/translations/*.qm
|
||||
|
||||
INSTALLS += \
|
||||
qm_files \
|
||||
|
|
Loading…
Reference in New Issue