修复文件索引中部分中文字符在term中被截断的问题

This commit is contained in:
iaom 2023-07-24 11:39:18 +08:00
parent fe3feb68e3
commit f6c928c8e1
3 changed files with 23 additions and 9 deletions

View File

@ -19,7 +19,10 @@ static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem");
static const int OCR_MIN_SIZE = 200; static const int OCR_MIN_SIZE = 200;
static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings"); static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings");
static const QString SEARCH_METHOD_KEY = QStringLiteral("fileIndexEnable"); static const QString SEARCH_METHOD_KEY = QStringLiteral("fileIndexEnable");
static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.0"); /**
* changelog 1.0.1 term中被截断的问题
*/
static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.1");
/** /**
* changelog 1.1.0 value * changelog 1.1.0 value
*/ */

View File

@ -21,6 +21,7 @@
#include <QDateTime> #include <QDateTime>
#include <QFileInfo> #include <QFileInfo>
#include <QUrl> #include <QUrl>
#include <QTextBoundaryFinder>
#include "file-utils.h" #include "file-utils.h"
using namespace UkuiSearch; using namespace UkuiSearch;
BasicIndexer::BasicIndexer(const QString& filePath): m_filePath(filePath) BasicIndexer::BasicIndexer(const QString& filePath): m_filePath(filePath)
@ -48,14 +49,19 @@ bool BasicIndexer::index()
QString indexName = info.fileName().toLower(); QString indexName = info.fileName().toLower();
QStringList pinyinTextList = FileUtils::findMultiToneWords(info.fileName()); QStringList pinyinTextList = FileUtils::findMultiToneWords(info.fileName());
int i = 0; int i = 0;
int postingCount = 1; //terms post of Xapian document is started from 1! int postingCount = 1; //terms post of Xapian document is started from 1!
while(postingCount <= indexName.size()) { int start = 0;
m_document.addPosting(QUrl::toPercentEncoding(indexName.at(i)).toStdString(), postingCount); QTextBoundaryFinder bf(QTextBoundaryFinder::Grapheme, indexName);
++postingCount; for(; bf.position() != -1; bf.toNextBoundary()) {
++i; int end = bf.position();
if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) {
m_document.addPosting(QUrl::toPercentEncoding(indexName.mid(start, end - start)).toStdString(), postingCount);
++postingCount;
}
start = end;
} }
for(QString& s : pinyinTextList) { for(QString& s : pinyinTextList) {
i = 0; i = 0;
while(i < s.size()) { while(i < s.size()) {

View File

@ -165,9 +165,14 @@ int FileSearch::keywordSearchfile() {
Xapian::Query FileSearch::creatQueryForFileSearch() { Xapian::Query FileSearch::creatQueryForFileSearch() {
auto userInput = m_keyword.toLower(); auto userInput = m_keyword.toLower();
std::vector<Xapian::Query> v; std::vector<Xapian::Query> v;
for(int i = 0; i < userInput.size(); i++) { QTextBoundaryFinder bf(QTextBoundaryFinder::Grapheme, userInput);
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.at(i)).toStdString())); int start = 0;
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description()); for(; bf.position() != -1; bf.toNextBoundary()) {
int end = bf.position();
if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) {
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.mid(start, end - start)).toStdString()));
}
start = end;
} }
Xapian::Query queryPhrase = Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end()); Xapian::Query queryPhrase = Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
return queryPhrase; return queryPhrase;