From f6c928c8e1f6ad8a80b5a244ae04eca825450f5b Mon Sep 17 00:00:00 2001 From: iaom Date: Mon, 24 Jul 2023 11:39:18 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=96=87=E4=BB=B6=E7=B4=A2?= =?UTF-8?q?=E5=BC=95=E4=B8=AD=E9=83=A8=E5=88=86=E4=B8=AD=E6=96=87=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=E5=9C=A8term=E4=B8=AD=E8=A2=AB=E6=88=AA=E6=96=AD?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libsearch/common.h | 5 ++++- libsearch/index/basic-indexer.cpp | 16 +++++++++++----- libsearch/index/search-manager.cpp | 11 ++++++++--- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/libsearch/common.h b/libsearch/common.h index dc7c0a3..fea6b31 100644 --- a/libsearch/common.h +++ b/libsearch/common.h @@ -19,7 +19,10 @@ static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem"); static const int OCR_MIN_SIZE = 200; static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings"); static const QString SEARCH_METHOD_KEY = QStringLiteral("fileIndexEnable"); -static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.0"); +/** + * changelog 1.0.1 修复部分中文字符在term中被截断的问题 + */ +static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.1"); /** * changelog 1.1.0 增加文件修改时间value */ diff --git a/libsearch/index/basic-indexer.cpp b/libsearch/index/basic-indexer.cpp index c4625fe..e96b0b6 100644 --- a/libsearch/index/basic-indexer.cpp +++ b/libsearch/index/basic-indexer.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include "file-utils.h" using namespace UkuiSearch; BasicIndexer::BasicIndexer(const QString& filePath): m_filePath(filePath) @@ -48,14 +49,19 @@ bool BasicIndexer::index() QString indexName = info.fileName().toLower(); QStringList pinyinTextList = FileUtils::findMultiToneWords(info.fileName()); - int i = 0; int postingCount = 1; //terms post of Xapian document is started from 1! - while(postingCount <= indexName.size()) { - m_document.addPosting(QUrl::toPercentEncoding(indexName.at(i)).toStdString(), postingCount); - ++postingCount; - ++i; + int start = 0; + QTextBoundaryFinder bf(QTextBoundaryFinder::Grapheme, indexName); + for(; bf.position() != -1; bf.toNextBoundary()) { + int end = bf.position(); + if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) { + m_document.addPosting(QUrl::toPercentEncoding(indexName.mid(start, end - start)).toStdString(), postingCount); + ++postingCount; + } + start = end; } + for(QString& s : pinyinTextList) { i = 0; while(i < s.size()) { diff --git a/libsearch/index/search-manager.cpp b/libsearch/index/search-manager.cpp index ed15ad3..1118aea 100644 --- a/libsearch/index/search-manager.cpp +++ b/libsearch/index/search-manager.cpp @@ -165,9 +165,14 @@ int FileSearch::keywordSearchfile() { Xapian::Query FileSearch::creatQueryForFileSearch() { auto userInput = m_keyword.toLower(); std::vector v; - for(int i = 0; i < userInput.size(); i++) { - v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.at(i)).toStdString())); - // qDebug()<