修复中文分词接口编译问题;

This commit is contained in:
jixiaoxu 2023-03-20 15:34:05 +08:00 committed by iaom
parent 73cccc4083
commit 79527082f9
1 changed files with 1 additions and 3 deletions

View File

@ -41,9 +41,7 @@ bool fileContentIndexer::index()
}
m_document.setData(content);
//'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString());
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content);
content.clear();
content.squeeze();