Fix:Index process crashed when parsing some wps templates.

This commit is contained in:
iaom 2021-05-17 14:47:39 +08:00
parent acbc54699a
commit abce24773c
8 changed files with 13 additions and 15 deletions

View File

@ -52,10 +52,10 @@ ChineseSegmentation *ChineseSegmentation::getInstance() {
return global_instance_chinese_segmentation;
}
QVector<SKeyWord> ChineseSegmentation::callSegement(QString str) {
std::string s;
s = str.toStdString();
str.squeeze();
QVector<SKeyWord> ChineseSegmentation::callSegement(std::string s) {
// std::string s;
// s = str.toStdString();
// str.squeeze();
const size_t topk = -1;
std::vector<cppjieba::KeywordExtractor::Word> keywordres;

View File

@ -47,7 +47,7 @@ class CHINESESEGMENTATION_EXPORT ChineseSegmentation {
public:
static ChineseSegmentation *getInstance();
~ChineseSegmentation();
QVector<SKeyWord> callSegement(QString str);
QVector<SKeyWord> callSegement(std::string s);
void convert(std::vector<cppjieba::KeywordExtractor::Word>& keywordres, QVector<SKeyWord>& kw);
private:
static QMutex m_mutex;

View File

@ -111,7 +111,7 @@ void ConstructDocumentForContent::run() {
QString uniqueterm = QString::fromStdString(FileUtils::makeDocUterm(m_path));
QString upTerm = QString::fromStdString(FileUtils::makeDocUterm(m_path.section("/", 0, -2, QString::SectionIncludeLeadingSep)));
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000));
QVector<SKeyWord> term = ChineseSegmentation::getInstance()->callSegement(content.left(20480000).toStdString());
Document doc;
doc.setData(content);

View File

@ -20,7 +20,7 @@
#include "document.h"
#include <QDebug>
using namespace Zeeker;
void Document::setData(QString data) {
void Document::setData(QString &data) {
if(data.isEmpty())
return;
m_document.set_data(data.toStdString());

View File

@ -39,7 +39,7 @@ public:
m_index_text = other.m_index_text;
m_unique_term = other.m_unique_term;
}
void setData(QString data);
void setData(QString &data);
void addPosting(std::string term, QVector<size_t> offset, int weight = 1);
void addPosting(std::string term, unsigned int offset, int weight = 1);
void addTerm(QString term);

View File

@ -354,7 +354,7 @@ Document IndexGenerator::GenerateContentDocument(const QString &path) {
QString upTerm;
FileReader::getTextContent(path, content);
term = ChineseSegmentation::getInstance()->callSegement(content);
term = ChineseSegmentation::getInstance()->callSegement(content.toStdString());
// QStringList term = content.split("");
doc.setData(content);

View File

@ -273,7 +273,7 @@ int FileContentSearch::keywordSearchContent() {
ret.erase(ret.begin(), ret.end());
::friso::ResultMap().swap(ret);
*/
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword);
QVector<SKeyWord> sKeyWord = ChineseSegmentation::getInstance()->callSegement(m_keyword.toStdString());
//Creat a query
std::string words;
for(int i = 0; i < sKeyWord.size(); i++) {
@ -419,6 +419,7 @@ void DirectSearch::run() {
// QDir::Hidden
dir.setFilter(QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot);
dir.setSorting(QDir::DirsFirst);
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
while(!bfs.empty()) {
dir.setPath(bfs.dequeue());
list = dir.entryInfoList();
@ -426,8 +427,6 @@ void DirectSearch::run() {
if (i.isDir() && (!(i.isSymLink()))) {
bool findIndex = false;
QStringList blockList = GlobalSettings::getInstance()->getBlockDirs();
for (QString j : blockList) {
if (i.absoluteFilePath().startsWith(j.prepend("/"))) {
findIndex = true;

View File

@ -4457,15 +4457,14 @@ bool bReadBuffer(FILE *pFile, ULONG ulStartBlock,
ULONG ulBegin, ulIndex;
size_t tLen;
for(ulIndex = ulStartBlock;
ulIndex != END_OF_CHAIN && tToRead != 0;
ulIndex = aulBlockDepot[ulIndex]) {
for(ulIndex = ulStartBlock;ulIndex != END_OF_CHAIN && tToRead != 0;ulIndex = aulBlockDepot[ulIndex]) {
if(ulIndex >= (ULONG)tBlockDepotLen) {
if(tBlockSize >= BIG_BLOCK_SIZE) {
qWarning() << "The Big Block Depot is damaged";
} else {
qWarning() << "The Small Block Depot is damaged";
}
return (tToRead == 0);
}
if(ulOffset >= (ULONG)tBlockSize) {
ulOffset -= tBlockSize;