diff --git a/libsearch/file-utils.cpp b/libsearch/file-utils.cpp index 1014a25..24f6dd9 100644 --- a/libsearch/file-utils.cpp +++ b/libsearch/file-utils.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -782,47 +783,192 @@ QString FileUtils::escapeHtml(const QString &str) return temp; } -QString FileUtils::chineseSubString(const std::string &myStr, uint start, uint length, const QString &keyword) +QString FileUtils::getSnippet(const std::string &myStr, uint start, const QString &keyword) { - std::string afterSub = ""; - QString sub = QString::fromStdString(myStr); + QFont boldFont(qApp->font().family()); + boldFont.setPointSizeF(qApp->font().pointSizeF() + 2); + boldFont.setWeight(QFont::Bold); + QFontMetricsF boldMetricsF(boldFont); - if (length >= myStr.length()) { - afterSub = myStr.substr(start,length); //截取; - if (horizontalAdvanceContainsKeyword(QString::fromStdString(afterSub), keyword) >= 2*LABEL_MAX_WIDTH) { - sub = QString::fromStdString(afterSub); - } - return wrapData(sub, keyword); - } + uint strLength = 240; + bool elideLeft(false); + std::string sub = myStr.substr(start, strLength); + QString content = QString::fromStdString(sub); - //从关键字截length个字,文本内容长度够截 - if (start + length <= myStr.length()) { - afterSub = myStr.substr(start,length); //截取 - sub = QString::fromStdString(afterSub); //转QString + //不够截往前补 + if (start + strLength > myStr.length()) { + //新的起始位置 + int newStart = myStr.length() - strLength; - if(start + length < myStr.length()){ - sub.replace(sub.length() - 1, 1, "…"); //最后一位可能为乱码,替换掉 - } - sub = wrapData(sub, keyword); - } else { - uint newStart = myStr.length() - length; //从start截到末尾长度不够length,更新截取位置到末尾前length个字的位置 - afterSub = myStr.substr(newStart, length); - sub = QString::fromStdString(afterSub); - - if (horizontalAdvanceContainsKeyword(QString::fromStdString(myStr.substr(newStart, start)), keyword) >= 2*LABEL_MAX_WIDTH) { - sub = wrapData(sub.replace(0, 1, "…"), keyword, true); + if (myStr.length() < strLength) { + newStart = 0; + sub = myStr; } else { - if (newStart + 3 < start) { - sub.replace(0, 1, "…")/*.append("…")*/; //第一个字有可能乱码,直接替换 + sub = myStr.substr(newStart, strLength); + } + + if (horizontalAdvanceContainsKeyword(QString::fromStdString(myStr.substr(newStart, start)) + boldMetricsF.horizontalAdvance(keyword), keyword) > 2 * LABEL_MAX_WIDTH) { + if (horizontalAdvanceContainsKeyword(QString::fromStdString(myStr.substr(start)), keyword) <= 2 * LABEL_MAX_WIDTH) { + elideLeft = true; } else { - afterSub = myStr.substr(start, length); //需要往前补三位以内说明:补的全是乱码,直接从start截就完了 - sub = "…" + QString::fromStdString(afterSub); -// sub.append("…"); + sub = myStr.substr(start); } - sub = wrapData(sub, keyword); + } + content = QString::fromStdString(sub); + } + + QFont font(qApp->font().family()); + font.setPointSizeF(qApp->font().pointSizeF()); + QFontMetricsF fontMetricsF(font); + + qreal blockLength = 0; + qreal total = 0; + int lineCount = 0; + int normalLength = 0; + int boldLength = 0; + + QString snippet; + int boundaryStart = 0; + int boundaryEnd = 0; + QTextBoundaryFinder fm(QTextBoundaryFinder::Grapheme, content); + + if (!elideLeft) { + for (;fm.position() != -1;fm.toNextBoundary()) { + boundaryEnd = fm.position(); + QString word = content.mid(boundaryStart, boundaryEnd - boundaryStart); + if (boundaryStart == boundaryEnd) { + continue; + } + + if (keyword.toUpper().contains(word.toUpper())) { + if (normalLength) { + total += fontMetricsF.horizontalAdvance(content.mid(boundaryStart - normalLength, normalLength)); + normalLength = 0; + blockLength = 0; + } + boldLength += (boundaryEnd - boundaryStart); + blockLength = boldMetricsF.horizontalAdvance(content.mid(boundaryEnd - boldLength, boldLength)); + } else { + if (boldLength) { + total += boldMetricsF.horizontalAdvance(content.mid(boundaryStart - boldLength, boldLength)); + boldLength = 0; + blockLength = 0; + } + normalLength += (boundaryEnd - boundaryStart); + blockLength = fontMetricsF.horizontalAdvance(content.mid(boundaryEnd - normalLength, normalLength)); + + } + + if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 0) { + if (total + blockLength > LABEL_MAX_WIDTH) { + fm.toPreviousBoundary(); + snippet.append("\n"); + } else { + snippet.append(word).append("\n"); + boundaryStart = boundaryEnd; + } + normalLength = 0; + boldLength = 0; + lineCount++; + total = 0; + blockLength = 0; + continue; + } else if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 1) { + qreal distance = 0; + qreal wordSize = 0; + if (total + blockLength > LABEL_MAX_WIDTH) { + boundaryEnd = boundaryStart; + fm.toPreviousBoundary(); + } else { + snippet.append(word); + } + while (wordSize < fontMetricsF.horizontalAdvance("…")) { + boundaryStart = fm.position(); + + wordSize += keyword.toUpper().contains(content.mid(boundaryStart, boundaryEnd - boundaryStart).toUpper()) ? + boldMetricsF.horizontalAdvance(content.mid(boundaryStart, boundaryEnd - boundaryStart)) + : fontMetricsF.horizontalAdvance(content.mid(boundaryStart, boundaryEnd - boundaryStart)); + distance += (boundaryEnd - boundaryStart); + boundaryEnd = boundaryStart; + fm.toPreviousBoundary(); + } + snippet = snippet.left(snippet.size() - distance); + snippet.append("…"); + break; + } + snippet.append(word); + boundaryStart = boundaryEnd; + } + } else { + boundaryEnd = content.size(); + for (fm.toEnd(); fm.position() != -1; fm.toPreviousBoundary()) { + boundaryStart = fm.position(); + if (boundaryEnd == boundaryStart) { + continue; + } + + QString word = content.mid(boundaryStart, boundaryEnd - boundaryStart); + if (keyword.toUpper().contains(word.toUpper())) { + if (normalLength) { + total += fontMetricsF.horizontalAdvance(content.mid(boundaryEnd, normalLength)); + normalLength = 0; + blockLength = 0; + } + boldLength += (boundaryEnd - boundaryStart); + blockLength = boldMetricsF.horizontalAdvance(content.mid(boundaryStart, boldLength)); + } else { + if (boldLength) { + total += boldMetricsF.horizontalAdvance(content.mid(boundaryEnd, boldLength)); + boldLength = 0; + blockLength = 0; + } + normalLength += (boundaryEnd - boundaryStart); + blockLength = fontMetricsF.horizontalAdvance(content.mid(boundaryStart, normalLength)); + + } + + if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 0) { + if (total + blockLength > LABEL_MAX_WIDTH) { + fm.toNextBoundary(); + snippet.prepend("\n"); + } else { + snippet.prepend(word).prepend("\n"); + boundaryStart = boundaryEnd; + } + normalLength = 0; + boldLength = 0; + lineCount++; + total = 0; + blockLength = 0; + continue; + } else if (total + blockLength >= LABEL_MAX_WIDTH && lineCount == 1) { + qreal distance = 0; + qreal wordSize = 0; + if (total + blockLength > LABEL_MAX_WIDTH) { + boundaryStart = boundaryEnd; + fm.toNextBoundary(); + } else { + snippet.prepend(word); + } + while (wordSize < fontMetricsF.horizontalAdvance("…")) { + boundaryEnd = fm.position(); + QString firstLetter = content.mid(boundaryStart, boundaryEnd - boundaryStart); + wordSize += keyword.toUpper().contains(firstLetter.toUpper()) ? + boldMetricsF.horizontalAdvance(firstLetter) : fontMetricsF.horizontalAdvance(firstLetter); + distance += (boundaryEnd - boundaryStart); + boundaryStart = boundaryEnd; + fm.toNextBoundary(); + } + snippet = snippet.right(snippet.size() - distance); + snippet.prepend("…"); + break; + } + snippet.prepend(word); + boundaryEnd = boundaryStart; } } - return sub; + + return snippet; } QIcon FileUtils::iconFromTheme(const QString &name, const QIcon &iconDefault) @@ -932,21 +1078,30 @@ QString FileUtils::getHtmlText(const QString &text, const QString &keyword) "}" "").arg(qApp->font().pointSizeF() + 2); bool boldOpenned = false; - for(int i = 0; i < text.length(); i++) { - if((keyword.toUpper()).contains(QString(text.at(i)).toUpper())) { + + QTextBoundaryFinder bf(QTextBoundaryFinder::Grapheme, text); + int start = 0; + for (;bf.position() != -1; bf.toNextBoundary()) { + int end = bf.position(); + if (end == start) { + continue; + } + if (keyword.toUpper().contains(text.mid(start, end - start).toUpper())) { if(! boldOpenned) { boldOpenned = true; htmlString.append(QString("")); } - htmlString.append(FileUtils::escapeHtml(QString(text.at(i)))); + htmlString.append(FileUtils::escapeHtml(text.mid(start, end - start))); } else { if(boldOpenned) { boldOpenned = false; htmlString.append(QString("")); } - htmlString.append(FileUtils::escapeHtml(QString(text.at(i)))); + htmlString.append(FileUtils::escapeHtml(text.mid(start, end - start))); } + start = end; } + htmlString.replace("\n", "
");//替换换行符 return "
" + htmlString + "
"; } @@ -986,176 +1141,6 @@ QString FileUtils::wrapData(QLabel *p_label, const QString &text) } } } -// p_label->setText(wrapText); - return wrapText; -} - -QString FileUtils::wrapData(const QString &text, const QString &keyword, bool elideLeft) -{ - QString wrapText = text; - - QFont boldFont(qApp->font().family()); - boldFont.setPointSizeF(qApp->font().pointSizeF() + 2); - boldFont.setWeight(QFont::Bold); - QFontMetricsF boldMetricsF(boldFont); - - QFont font(qApp->font().family()); - font.setPointSizeF(qApp->font().pointSizeF()); - QFontMetricsF fontMetricsF(font); - qreal blockLength = 0; - qreal total = 0; - int lineCount = 0; - int normalLength = 0; - int boldLength = 0; - - if (elideLeft) { - for (int i = text.length() - 1; i >= 0; i--) { - if (keyword.toUpper().contains(text.at(i).toUpper())) { - if (normalLength) { - total += fontMetricsF.horizontalAdvance(text.mid(i + 1, normalLength)); - normalLength = 0; - blockLength = 0; - } - if (boldLength) { - blockLength = boldMetricsF.horizontalAdvance(text.mid(i + 1, boldLength)); - } - boldLength++; - } else { - if (boldLength) { - total += boldMetricsF.horizontalAdvance(text.mid(i + 1, boldLength)); - boldLength = 0; - blockLength = 0; - } - - if (normalLength) { - blockLength = fontMetricsF.horizontalAdvance(text.mid(i + 1, normalLength)); - } - normalLength++; - } - - if (!i) { - if (normalLength) { - blockLength = fontMetricsF.horizontalAdvance(text.left(normalLength)); - } - if (boldLength) { - blockLength = boldMetricsF.horizontalAdvance(text.left(boldLength)); - } - } - - if (total + blockLength >= LABEL_MAX_WIDTH) { - i++; - if (total + blockLength > LABEL_MAX_WIDTH) { - if (normalLength) { - normalLength = 1; - } else { - boldLength = 1; - } - } else { - normalLength = 0; - boldLength = 0; - } - wrapText.insert(i + 1, '\n'); - lineCount++; - total = 0; - blockLength = 0; - } - - if (lineCount == 2) { - QString leftWord = text.left(i + 1); - if (!leftWord.isEmpty()) { - qreal distance = 2;//2是换行符加上要换第一个字 - qreal wordSize = 0; - for (int index = i + 1; index < text.length(); index++) { - wordSize += keyword.toUpper().contains(text.at(index).toUpper()) ? - boldMetricsF.horizontalAdvance(text.at(index)) : fontMetricsF.horizontalAdvance(text.at(index)); - if (wordSize < fontMetricsF.horizontalAdvance("…")) { - distance++;//字长度比省略号要小,可能会挡上,所以再截一个字 - } else { - break; - } - } - wrapText = wrapText.right(wrapText.size() - leftWord.size() - distance); - wrapText.prepend("…"); - } - break; - } - } - } else { - for (int i = 0; i <= text.length(); i++) { - if (i == text.length()) { - if (normalLength) { - blockLength = fontMetricsF.horizontalAdvance(text.right(normalLength)); - } - if (boldLength) { - blockLength = boldMetricsF.horizontalAdvance(text.right(boldLength)); - } - } else { - if (keyword.toUpper().contains(text.at(i).toUpper())) { - if (normalLength) { - total += fontMetricsF.horizontalAdvance(text.mid(i - normalLength, normalLength)); - normalLength = 0; - blockLength = 0; - } - if (boldLength) { - blockLength = boldMetricsF.horizontalAdvance(text.mid(i - boldLength, boldLength)); - } - boldLength++; - } else { - if (boldLength) { - total += boldMetricsF.horizontalAdvance(text.mid(i - boldLength, boldLength)); - boldLength = 0; - blockLength = 0; - } - - if (normalLength) { - blockLength = fontMetricsF.horizontalAdvance(text.mid(i - normalLength, normalLength)); - } - normalLength++; - } - } - - if (total + blockLength >= LABEL_MAX_WIDTH) { - i--; - if (total + blockLength > LABEL_MAX_WIDTH) { - wrapText.insert(i + lineCount, '\n'); - if (normalLength) { - normalLength = 1; - } else { - boldLength = 1; - } - } else { - wrapText.insert(i + 1 + lineCount, '\n'); - normalLength = 0; - boldLength = 0; - } - - lineCount++; - total = 0; - blockLength = 0; - } - - if (lineCount == 2) { - QString leftWord = text.mid(i); - if (!leftWord.isEmpty()) { - qreal distance = 2; - qreal wordSize = 0; - for (int index = i; index > 0; index--) { - wordSize += keyword.toUpper().contains(text.at(index).toUpper()) ? - boldMetricsF.horizontalAdvance(text.at(index)) : fontMetricsF.horizontalAdvance(text.at(index)); - if (wordSize < fontMetricsF.horizontalAdvance("…")) { - distance++; - } else { - break; - } - } - wrapText = wrapText.left(wrapText.size() - leftWord.size() - distance); - wrapText.append("…"); - } - break; - } - } - } - return wrapText; } @@ -1169,25 +1154,37 @@ qreal FileUtils::horizontalAdvanceContainsKeyword(const QString &content, const QFont font(qApp->font().family()); font.setPointSizeF(qApp->font().pointSizeF()); QFontMetricsF fontMetricsF(font); + + QTextBoundaryFinder fm(QTextBoundaryFinder::Grapheme, content); + int start = 0; + qreal contentSize = 0; int boldLength = 0; int normalLength = 0; - for (int i = 0; i < content.length(); i++) { - if (keyword.toUpper().contains(content.at(i).toUpper())) { - boldLength++; + for (;fm.position() != -1;fm.toNextBoundary()) { + int end = fm.position(); + if (end == start) { + continue; + } + QString letter = content.mid(start, end - start); + + if (keyword.toUpper().contains(letter.toUpper())) { if (normalLength) { - contentSize += boldMetricsF.horizontalAdvance(content.mid(i - normalLength, normalLength)); + contentSize += fontMetricsF.horizontalAdvance(content.mid(start - normalLength, normalLength)); normalLength = 0; } + boldLength += (end - start); } else { - normalLength++; if (boldLength) { - contentSize += boldMetricsF.horizontalAdvance(content.mid(i - boldLength, boldLength)); + contentSize += boldMetricsF.horizontalAdvance(content.mid(start - boldLength, boldLength)); boldLength = 0; } + normalLength += (end - start); } + start = end; } + if (boldLength) { contentSize += boldMetricsF.horizontalAdvance(content.right(boldLength)); } diff --git a/libsearch/file-utils.h b/libsearch/file-utils.h index 60aaaed..64ea928 100644 --- a/libsearch/file-utils.h +++ b/libsearch/file-utils.h @@ -34,7 +34,6 @@ public: static QString getHtmlText(const QString &text, const QString &keyword); static QString setAllTextBold(const QString &name); static QString wrapData(QLabel *p_label, const QString &text); - static QString wrapData(const QString &text, const QString &keyword, bool elideLeft = false); static qreal horizontalAdvanceContainsKeyword(const QString &content, const QString &keyword); static std::string makeDocUterm(QString path); static QIcon getFileIcon(const QString &uri, bool checkValid = true); @@ -64,7 +63,7 @@ public: static int openFile(QString &path, bool openInDir = false); static bool copyPath(QString &path); static QString escapeHtml(const QString &str); - static QString chineseSubString(const std::string &myStr, uint start, uint length, const QString &keyword); + static QString getSnippet(const std::string &myStr, uint start, const QString &keyword); static QIcon iconFromTheme(const QString &name, const QIcon &iconDefault); static bool isOpenXMLFileEncrypted(const QString &path); /** diff --git a/libsearch/index/search-manager.cpp b/libsearch/index/search-manager.cpp index 805013a..4054d37 100644 --- a/libsearch/index/search-manager.cpp +++ b/libsearch/index/search-manager.cpp @@ -355,7 +355,7 @@ int FileContentSearch::getResult(Xapian::MSet &result, std::string &keyWord) { } } auto pos = termIterator.positionlist_begin(); - QString snippet = FileUtils::chineseSubString(data, *pos, 120, QString::fromStdString(keyWord).remove(" ")); + QString snippet = FileUtils::getSnippet(data, *pos, QString::fromStdString(keyWord).remove(" ")); ri.description.prepend(SearchPluginIface::DescriptionInfo{"",FileUtils::getHtmlText(snippet, QString::fromStdString(keyWord).remove(" "))}); QString().swap(snippet); @@ -472,7 +472,7 @@ int OcrSearch::getResult(Xapian::MSet &result, std::string &keyWord) { term.skip_to(wordTobeFound); //fix me: make a snippet without cut cjk char. auto pos = term.positionlist_begin(); - QString snippet = FileUtils::chineseSubString(data, *pos, 120, QString::fromStdString(keyWord).remove(" ")); + QString snippet = FileUtils::getSnippet(data, *pos, QString::fromStdString(keyWord).remove(" ")); ri.description.prepend(SearchPluginIface::DescriptionInfo{"", FileUtils::getHtmlText(snippet, QString::fromStdString(keyWord).remove(" "))}); QString().swap(snippet);