Optimized segment lib.
This commit is contained in:
parent
bcc226a650
commit
0f315b0f23
|
@ -1,11 +1,8 @@
|
||||||
#include "chinese-segmentation.h"
|
#include "chinese-segmentation.h"
|
||||||
#include <QFileInfo>
|
#include <QFileInfo>
|
||||||
|
static ChineseSegmentation *global_instance = nullptr;
|
||||||
|
|
||||||
ChineseSegmentation::ChineseSegmentation()
|
ChineseSegmentation::ChineseSegmentation()
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
|
|
||||||
{
|
{
|
||||||
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
|
const char * const DICT_PATH = "/usr/share/ukui-search/res/dict/jieba.dict.utf8";
|
||||||
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
|
const char * const HMM_PATH = "/usr/share/ukui-search/res/dict/hmm_model.utf8";
|
||||||
|
@ -13,19 +10,35 @@ QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
|
||||||
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
|
const char * const IDF_PATH = "/usr/share/ukui-search/res/dict/idf.utf8";
|
||||||
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
|
const char * const STOP_WORD_PATH = "/usr/share/ukui-search/res/dict/stop_words.utf8";
|
||||||
|
|
||||||
|
m_jieba = new cppjieba::Jieba(DICT_PATH,
|
||||||
cppjieba::Jieba jieba(DICT_PATH,
|
|
||||||
HMM_PATH,
|
HMM_PATH,
|
||||||
USER_DICT_PATH,
|
USER_DICT_PATH,
|
||||||
IDF_PATH,
|
IDF_PATH,
|
||||||
STOP_WORD_PATH);
|
STOP_WORD_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
ChineseSegmentation::~ChineseSegmentation()
|
||||||
|
{
|
||||||
|
if(m_jieba)
|
||||||
|
delete m_jieba;
|
||||||
|
}
|
||||||
|
|
||||||
|
ChineseSegmentation *ChineseSegmentation::getInstance()
|
||||||
|
{
|
||||||
|
if (!global_instance) {
|
||||||
|
global_instance = new ChineseSegmentation;
|
||||||
|
}
|
||||||
|
return global_instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
QVector<SKeyWord> ChineseSegmentation::callSegement(QString *str)
|
||||||
|
{
|
||||||
std::string s;
|
std::string s;
|
||||||
s=str->toStdString();
|
s=str->toStdString();
|
||||||
|
|
||||||
const size_t topk = -1;
|
const size_t topk = -1;
|
||||||
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
std::vector<cppjieba::KeywordExtractor::Word> keywordres;
|
||||||
jieba.extractor.Extract(s, keywordres, topk);
|
ChineseSegmentation::m_jieba->extractor.Extract(s, keywordres, topk);
|
||||||
QVector<SKeyWord> vecNeeds;
|
QVector<SKeyWord> vecNeeds;
|
||||||
convert(keywordres, vecNeeds);
|
convert(keywordres, vecNeeds);
|
||||||
|
|
||||||
|
|
|
@ -20,10 +20,13 @@ struct SKeyWord{
|
||||||
class CHINESESEGMENTATION_EXPORT ChineseSegmentation
|
class CHINESESEGMENTATION_EXPORT ChineseSegmentation
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static QVector<SKeyWord> callSegement(QString *str);
|
static ChineseSegmentation *getInstance();
|
||||||
static void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
|
QVector<SKeyWord> callSegement(QString *str);
|
||||||
|
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres,QVector<SKeyWord>& kw);
|
||||||
private:
|
private:
|
||||||
ChineseSegmentation();
|
cppjieba::Jieba *m_jieba;
|
||||||
|
explicit ChineseSegmentation();
|
||||||
|
~ChineseSegmentation();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // CHINESESEGMENTATION_H
|
#endif // CHINESESEGMENTATION_H
|
||||||
|
|
|
@ -87,7 +87,7 @@ void FileSearcher::onKeywordSearchContent(QString keyword, int begin, int num)
|
||||||
qp.set_default_op(Xapian::Query::OP_AND);
|
qp.set_default_op(Xapian::Query::OP_AND);
|
||||||
qp.set_database(db);
|
qp.set_database(db);
|
||||||
|
|
||||||
QVector<SKeyWord> sKeyWord = ChineseSegmentation::callSegement(&keyword);
|
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(&keyword);
|
||||||
//Creat a query
|
//Creat a query
|
||||||
std::string words;
|
std::string words;
|
||||||
for(int i=0;i<sKeyWord.size();i++)
|
for(int i=0;i<sKeyWord.size();i++)
|
||||||
|
|
|
@ -21,7 +21,7 @@ Q_SIGNALS:
|
||||||
private:
|
private:
|
||||||
const QVector<QString> targetFileTypeVec ={
|
const QVector<QString> targetFileTypeVec ={
|
||||||
// QString(".doc"),
|
// QString(".doc"),
|
||||||
// QString(".docx"),
|
QString(".docx"),
|
||||||
// QString(".ppt"),
|
// QString(".ppt"),
|
||||||
// QString(".pptx"),
|
// QString(".pptx"),
|
||||||
// QString(".xls"),
|
// QString(".xls"),
|
||||||
|
|
|
@ -216,7 +216,7 @@ Document IndexGenerator::GenerateContentDocument(const QString &path)
|
||||||
// 构造文本索引的document
|
// 构造文本索引的document
|
||||||
QString *content = FileReader::getTextContent(path);
|
QString *content = FileReader::getTextContent(path);
|
||||||
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
|
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(path));
|
||||||
QVector<SKeyWord> term = ChineseSegmentation::callSegement(content);
|
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content);
|
||||||
Document doc;
|
Document doc;
|
||||||
doc.setData(*content);
|
doc.setData(*content);
|
||||||
doc.setUniqueTerm(uniqueterm);
|
doc.setUniqueTerm(uniqueterm);
|
||||||
|
|
|
@ -2,9 +2,6 @@
|
||||||
<qresource prefix="/">
|
<qresource prefix="/">
|
||||||
<file>res/icons/commonuse.svg</file>
|
<file>res/icons/commonuse.svg</file>
|
||||||
<file>res/icons/edit-find-symbolic.svg</file>
|
<file>res/icons/edit-find-symbolic.svg</file>
|
||||||
<file>res/translations/bo.ts</file>
|
|
||||||
<file>res/translations/tr.ts</file>
|
|
||||||
<file>res/translations/zh_CN.ts</file>
|
|
||||||
<file>res/icons/desktop.png</file>
|
<file>res/icons/desktop.png</file>
|
||||||
<file>res/icons/close.svg</file>
|
<file>res/icons/close.svg</file>
|
||||||
</qresource>
|
</qresource>
|
||||||
|
|
|
@ -39,8 +39,8 @@ HEADERS += \
|
||||||
settings-widget.h
|
settings-widget.h
|
||||||
|
|
||||||
# Default rules for deployment.
|
# Default rules for deployment.
|
||||||
qnx: target.path = /tmp/$${TARGET}/bin
|
|
||||||
else: unix:!android: target.path = /opt/$${TARGET}/bin
|
target.path = /usr/bin
|
||||||
!isEmpty(target.path): INSTALLS += target
|
!isEmpty(target.path): INSTALLS += target
|
||||||
|
|
||||||
RESOURCES += \
|
RESOURCES += \
|
||||||
|
@ -52,7 +52,7 @@ TRANSLATIONS += \
|
||||||
res/translations/bo.ts
|
res/translations/bo.ts
|
||||||
|
|
||||||
qm_files.path = /usr/share/ukui-search/res/translations/
|
qm_files.path = /usr/share/ukui-search/res/translations/
|
||||||
qm_files.files = res/translations/*.qm\
|
qm_files.files = $$OUT_PWD/res/translations/*.qm
|
||||||
|
|
||||||
INSTALLS += \
|
INSTALLS += \
|
||||||
qm_files \
|
qm_files \
|
||||||
|
|
Loading…
Reference in New Issue