修复文件索引中部分中文字符在term中被截断的问题

This commit is contained in:
iaom 2023-07-24 11:39:18 +08:00
parent c0c634f980
commit 637e71e277
3 changed files with 23 additions and 9 deletions

View File

@ -19,7 +19,10 @@ static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem");
static const int OCR_MIN_SIZE = 200;
static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings");
static const QString SEARCH_METHOD_KEY = QStringLiteral("fileIndexEnable");
static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.0");
/**
* changelog 1.0.1 term中被截断的问题
*/
static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.1");
/**
* changelog 1.1.0 value
*/

View File

@ -21,6 +21,7 @@
#include <QDateTime>
#include <QFileInfo>
#include <QUrl>
#include <QTextBoundaryFinder>
#include "file-utils.h"
using namespace UkuiSearch;
BasicIndexer::BasicIndexer(const QString& filePath): m_filePath(filePath)
@ -48,14 +49,19 @@ bool BasicIndexer::index()
QString indexName = info.fileName().toLower();
QStringList pinyinTextList = FileUtils::findMultiToneWords(info.fileName());
int i = 0;
int postingCount = 1; //terms post of Xapian document is started from 1!
while(postingCount <= indexName.size()) {
m_document.addPosting(QUrl::toPercentEncoding(indexName.at(i)).toStdString(), postingCount);
++postingCount;
++i;
int start = 0;
QTextBoundaryFinder bf(QTextBoundaryFinder::Grapheme, indexName);
for(; bf.position() != -1; bf.toNextBoundary()) {
int end = bf.position();
if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) {
m_document.addPosting(QUrl::toPercentEncoding(indexName.mid(start, end - start)).toStdString(), postingCount);
++postingCount;
}
start = end;
}
for(QString& s : pinyinTextList) {
i = 0;
while(i < s.size()) {

View File

@ -165,9 +165,14 @@ int FileSearch::keywordSearchfile() {
Xapian::Query FileSearch::creatQueryForFileSearch() {
auto userInput = m_keyword.toLower();
std::vector<Xapian::Query> v;
for(int i = 0; i < userInput.size(); i++) {
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.at(i)).toStdString()));
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
QTextBoundaryFinder bf(QTextBoundaryFinder::Grapheme, userInput);
int start = 0;
for(; bf.position() != -1; bf.toNextBoundary()) {
int end = bf.position();
if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) {
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.mid(start, end - start)).toStdString()));
}
start = end;
}
Xapian::Query queryPhrase = Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
return queryPhrase;