修复中文分词接口编译问题;
This commit is contained in:
parent
73cccc4083
commit
79527082f9
|
@ -41,9 +41,7 @@ bool fileContentIndexer::index()
|
|||
}
|
||||
|
||||
m_document.setData(content);
|
||||
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
||||
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
||||
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString());
|
||||
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content);
|
||||
content.clear();
|
||||
content.squeeze();
|
||||
|
||||
|
|
Loading…
Reference in New Issue