修复文件索引中部分中文字符在term中被截断的问题
This commit is contained in:
parent
fe3feb68e3
commit
f6c928c8e1
|
@ -19,7 +19,10 @@ static const QString INDEX_SEM = QStringLiteral("ukui-search-index-sem");
|
||||||
static const int OCR_MIN_SIZE = 200;
|
static const int OCR_MIN_SIZE = 200;
|
||||||
static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings");
|
static const QByteArray UKUI_SEARCH_SCHEMAS = QByteArrayLiteral("org.ukui.search.settings");
|
||||||
static const QString SEARCH_METHOD_KEY = QStringLiteral("fileIndexEnable");
|
static const QString SEARCH_METHOD_KEY = QStringLiteral("fileIndexEnable");
|
||||||
static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.0");
|
/**
|
||||||
|
* changelog 1.0.1 修复部分中文字符在term中被截断的问题
|
||||||
|
*/
|
||||||
|
static const QString INDEX_DATABASE_VERSION = QStringLiteral("1.0.1");
|
||||||
/**
|
/**
|
||||||
* changelog 1.1.0 增加文件修改时间value
|
* changelog 1.1.0 增加文件修改时间value
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include <QDateTime>
|
#include <QDateTime>
|
||||||
#include <QFileInfo>
|
#include <QFileInfo>
|
||||||
#include <QUrl>
|
#include <QUrl>
|
||||||
|
#include <QTextBoundaryFinder>
|
||||||
#include "file-utils.h"
|
#include "file-utils.h"
|
||||||
using namespace UkuiSearch;
|
using namespace UkuiSearch;
|
||||||
BasicIndexer::BasicIndexer(const QString& filePath): m_filePath(filePath)
|
BasicIndexer::BasicIndexer(const QString& filePath): m_filePath(filePath)
|
||||||
|
@ -48,14 +49,19 @@ bool BasicIndexer::index()
|
||||||
|
|
||||||
QString indexName = info.fileName().toLower();
|
QString indexName = info.fileName().toLower();
|
||||||
QStringList pinyinTextList = FileUtils::findMultiToneWords(info.fileName());
|
QStringList pinyinTextList = FileUtils::findMultiToneWords(info.fileName());
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int postingCount = 1; //terms post of Xapian document is started from 1!
|
int postingCount = 1; //terms post of Xapian document is started from 1!
|
||||||
while(postingCount <= indexName.size()) {
|
int start = 0;
|
||||||
m_document.addPosting(QUrl::toPercentEncoding(indexName.at(i)).toStdString(), postingCount);
|
QTextBoundaryFinder bf(QTextBoundaryFinder::Grapheme, indexName);
|
||||||
++postingCount;
|
for(; bf.position() != -1; bf.toNextBoundary()) {
|
||||||
++i;
|
int end = bf.position();
|
||||||
|
if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) {
|
||||||
|
m_document.addPosting(QUrl::toPercentEncoding(indexName.mid(start, end - start)).toStdString(), postingCount);
|
||||||
|
++postingCount;
|
||||||
|
}
|
||||||
|
start = end;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(QString& s : pinyinTextList) {
|
for(QString& s : pinyinTextList) {
|
||||||
i = 0;
|
i = 0;
|
||||||
while(i < s.size()) {
|
while(i < s.size()) {
|
||||||
|
|
|
@ -165,9 +165,14 @@ int FileSearch::keywordSearchfile() {
|
||||||
Xapian::Query FileSearch::creatQueryForFileSearch() {
|
Xapian::Query FileSearch::creatQueryForFileSearch() {
|
||||||
auto userInput = m_keyword.toLower();
|
auto userInput = m_keyword.toLower();
|
||||||
std::vector<Xapian::Query> v;
|
std::vector<Xapian::Query> v;
|
||||||
for(int i = 0; i < userInput.size(); i++) {
|
QTextBoundaryFinder bf(QTextBoundaryFinder::Grapheme, userInput);
|
||||||
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.at(i)).toStdString()));
|
int start = 0;
|
||||||
// qDebug()<<QString::fromStdString(Xapian::Query(QString(userInput.at(i)).toStdString()).get_description());
|
for(; bf.position() != -1; bf.toNextBoundary()) {
|
||||||
|
int end = bf.position();
|
||||||
|
if(bf.boundaryReasons() & QTextBoundaryFinder::EndOfItem) {
|
||||||
|
v.push_back(Xapian::Query(QUrl::toPercentEncoding(userInput.mid(start, end - start)).toStdString()));
|
||||||
|
}
|
||||||
|
start = end;
|
||||||
}
|
}
|
||||||
Xapian::Query queryPhrase = Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
|
Xapian::Query queryPhrase = Xapian::Query(Xapian::Query::OP_PHRASE, v.begin(), v.end());
|
||||||
return queryPhrase;
|
return queryPhrase;
|
||||||
|
|
Loading…
Reference in New Issue