Merge pull request #249 from iaom/0531-dev-main

[FIX]: Offset info error in keyword extraction.
This commit is contained in:
纪老师 2021-05-31 16:38:54 +08:00 committed by GitHub
commit 83ff5325f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 1 deletions

View File

@ -116,7 +116,8 @@ void ConstructDocumentForContent::run() {
doc.addTerm(upTerm); doc.addTerm(upTerm);
doc.addValue(m_path); doc.addValue(m_path);
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " "); //'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
// QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000)); // QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
//修改函数返回类型修改入参为std::string引用--jxx20210519 //修改函数返回类型修改入参为std::string引用--jxx20210519