diff --git a/libsearch/index/file-content-indexer.cpp b/libsearch/index/file-content-indexer.cpp index ee488a4..dc989b5 100644 --- a/libsearch/index/file-content-indexer.cpp +++ b/libsearch/index/file-content-indexer.cpp @@ -41,9 +41,7 @@ bool fileContentIndexer::index() } m_document.setData(content); - //'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info. - content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " "); - std::vector term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString()); + std::vector term = ChineseSegmentation::getInstance()->callSegment(content); content.clear(); content.squeeze();