中文分词接口统一处理;

This commit is contained in:
jixiaoxu 2023-03-20 15:21:58 +08:00
parent 308b9b642f
commit 0725b5b097
5 changed files with 8 additions and 9 deletions

@ -1 +1 @@
Subproject commit 124c661581e0f2e10a47d3cc237f7ade404f1bec Subproject commit bf085a4a188010fac212e0f80e133c777857552b

View File

@ -41,9 +41,8 @@ bool fileContentIndexer::index()
} }
m_document.setData(content); m_document.setData(content);
//'\xEF\xBC\x8C' is "" "\xE3\x80\x82" is "。" use three " " to replace ,to ensure the offset info.
content = content.replace("\t", " ").replace("\xEF\xBC\x8C", " ").replace("\xE3\x80\x82", " "); std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000));
std::vector<KeyWord> term = ChineseSegmentation::getInstance()->callSegment(content.left(20480000).toStdString());
content.clear(); content.clear();
content.squeeze(); content.squeeze();

View File

@ -274,7 +274,7 @@ int FileContentSearch::keywordSearchContent() {
qp.set_default_op(Xapian::Query::OP_AND); qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db); qp.set_database(db);
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString()); std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
//Creat a query //Creat a query
std::string words; std::string words;
for(size_t i = 0; i < sKeyWord.size(); i++) { for(size_t i = 0; i < sKeyWord.size(); i++) {
@ -414,7 +414,7 @@ int OcrSearch::keywordSearchOcr() {
Xapian::QueryParser qp; Xapian::QueryParser qp;
qp.set_default_op(Xapian::Query::OP_AND); qp.set_default_op(Xapian::Query::OP_AND);
qp.set_database(db); qp.set_database(db);
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString()); std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(m_keyword);
//Creat a query //Creat a query
std::string words; std::string words;
for(int i = 0; i < sKeyWord.size(); i++) { for(int i = 0; i < sKeyWord.size(); i++) {

View File

@ -180,7 +180,7 @@ NoteSearch::NoteSearch(DataQueue<SearchPluginIface::ResultInfo> *searchResult, c
} }
void NoteSearch::run() { void NoteSearch::run() {
std::vector<KeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegment(m_keyword.toStdString()); std::vector<KeyWord> sKeyWordVec = ChineseSegmentation::getInstance()->callSegment(m_keyword);
QStringList keywordList; QStringList keywordList;
for (KeyWord sKeyWord : sKeyWordVec) { for (KeyWord sKeyWord : sKeyWordVec) {
keywordList.append(QString::fromStdString(sKeyWord.word)); keywordList.append(QString::fromStdString(sKeyWord.word));

View File

@ -179,8 +179,8 @@ inline Xapian::Query FileContentSearchWorker::createQuery()
{ {
std::vector<Xapian::Query> v; std::vector<Xapian::Query> v;
for (const auto &keyword : m_searchController->getKeyword()) { for (auto &keyword : m_searchController->getKeyword()) {
std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(keyword.toStdString()); std::vector<KeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegment(keyword);
for(const auto & c : sKeyWord) { for(const auto & c : sKeyWord) {
v.emplace_back(c.word); v.emplace_back(c.word);