中文分词接口统一处理;

This commit is contained in:
jixiaoxu 2023-03-20 15:21:58 +08:00
parent 308b9b642f
commit 0725b5b097
5 changed files with 8 additions and 9 deletions

@ -1 +1 @@
Subproject commit 124c661581e0f2e10a47d3cc237f7ade404f1bec
Subproject commit bf085a4a188010fac212e0f80e133c777857552b

View File

@ -41,9 +41,8 @@ bool fileContentIndexer::index()
}
m_document.setData(content);
//'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " ");
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString());
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000));
content.clear();
content.squeeze();

View File

@ -274,7 +274,7 @@ int FileContentSearch::keywordSearchContent() {
qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db);
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
//Creat a query
std::string words;
for(size_t i = 0; i < sKeyWord.size(); i++) {
@ -414,7 +414,7 @@ int OcrSearch::keywordSearchOcr() {
Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db);
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
//Creat a query
std::string words;
for(int i = 0; i < sKeyWord.size(); i++) {

View File

@ -180,7 +180,7 @@ NoteSearch::NoteSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, c
}
void NoteSearch::run() {
std::vector<KeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString());
std::vector<KeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegment(m_keyword);
QStringList keywordList;
for (KeyWord sKeyWord : sKeyWordVec) {
keywordList.append(QString::fromStdString(sKeyWord.word));

View File

@ -179,8 +179,8 @@ inline Xapian::Query FileContentSearchWorker::createQuery()
{
std::vector<Xapian::Query> v;
for (const auto &keyword : m_searchController->getKeyword()) {
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(keyword.toStdString());
for (auto &keyword : m_searchController->getKeyword()) {
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(keyword);
for(const auto & c : sKeyWord) {
v.emplace_back(c.word);