中文分词接口统一处理;
This commit is contained in:
parent
308b9b642f
commit
0725b5b097
|
@ -1 +1 @@
|
|||
Subproject commit 124c661581e0f2e10a47d3cc237f7ade404f1bec
|
||||
Subproject commit bf085a4a188010fac212e0f80e133c777857552b
|
|
@ -41,9 +41,8 @@ bool fileContentIndexer::index()
|
|||
}
|
||||
|
||||
m_document.setData(content);
|
||||
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
||||
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
||||
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString());
|
||||
|
||||
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000));
|
||||
content.clear();
|
||||
content.squeeze();
|
||||
|
||||
|
|
|
@ -274,7 +274,7 @@ int FileContentSearch::keywordSearchContent() {
|
|||
qp.set_default_op(Xapian::Query::OP_AND);
|
||||
qp.set_database(db);
|
||||
|
||||
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
|
||||
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
|
||||
//Creat a query
|
||||
std::string words;
|
||||
for(size_t i = 0; i < sKeyWord.size(); i++) {
|
||||
|
@ -414,7 +414,7 @@ int OcrSearch::keywordSearchOcr() {
|
|||
Xapian::QueryParser qp;
|
||||
qp.set_default_op(Xapian::Query::OP_AND);
|
||||
qp.set_database(db);
|
||||
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
|
||||
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
|
||||
//Creat a query
|
||||
std::string words;
|
||||
for(int i = 0; i < sKeyWord.size(); i++) {
|
||||
|
|
|
@ -180,7 +180,7 @@ NoteSearch::NoteSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, c
|
|||
}
|
||||
|
||||
void NoteSearch::run() {
|
||||
std::vector<KeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
|
||||
std::vector<KeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegment(m_keyword);
|
||||
QStringList keywordList;
|
||||
for (KeyWord sKeyWord : sKeyWordVec) {
|
||||
keywordList.append(QString::fromStdString(sKeyWord.word));
|
||||
|
|
|
@ -179,8 +179,8 @@ inline Xapian::Query FileContentSearchWorker::createQuery()
|
|||
{
|
||||
std::vector<Xapian::Query> v;
|
||||
|
||||
for (const auto &keyword : m_searchController->getKeyword()) {
|
||||
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(keyword.toStdString());
|
||||
for (auto &keyword : m_searchController->getKeyword()) {
|
||||
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(keyword);
|
||||
|
||||
for(const auto & c : sKeyWord) {
|
||||
v.emplace_back(c.word);
|
||||
|
|
Loading…
Reference in New Issue