Merge pull request #249 from iaom/0531-dev-main
[FIX]: Offset info error in keyword extraction.
This commit is contained in:
commit
83ff5325f6
|
@ -116,7 +116,8 @@ void ConstructDocumentForContent::run() {
|
||||||
doc.addTerm(upTerm);
|
doc.addTerm(upTerm);
|
||||||
doc.addValue(m_path);
|
doc.addValue(m_path);
|
||||||
|
|
||||||
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
//'\xEF\xBC\x8C' is "," "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
|
||||||
|
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
|
||||||
|
|
||||||
// QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
|
// QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
|
||||||
//修改函数返回类型,修改入参为std::string引用--jxx20210519
|
//修改函数返回类型,修改入参为std::string引用--jxx20210519
|
||||||
|
|
Loading…
Reference in New Issue